LLVM 20.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/LEB128.h"
37#include <cstdlib>
38
39#define DEBUG_TYPE "x86-fl"
40
41STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
42STATISTIC(NumFrameExtraProbe,
43 "Number of extra stack probes generated in prologue");
44STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
45
46using namespace llvm;
47
49 MaybeAlign StackAlignOverride)
50 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
51 STI.is64Bit() ? -8 : -4),
52 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
55 Is64Bit = STI.is64Bit();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
60}
61
63 return !MF.getFrameInfo().hasVarSizedObjects() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
66}
67
68/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69/// call frame pseudos can be simplified. Having a FP, as in the default
70/// implementation, is not sufficient here since we can't always use it.
71/// Use a more nuanced condition.
73 const MachineFunction &MF) const {
74 return hasReservedCallFrame(MF) ||
75 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
76 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78}
79
80// needsFrameIndexResolution - Do we need to perform FI resolution for
81// this function. Normally, this is required only when the function
82// has any stack objects. However, FI resolution actually has another job,
83// not apparent from the title - it resolves callframesetup/destroy
84// that were not simplified earlier.
85// So, this is required for x86 functions that have push sequences even
86// when there are no stack objects.
88 const MachineFunction &MF) const {
89 return MF.getFrameInfo().hasStackObjects() ||
90 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
91}
92
93/// hasFPImpl - Return true if the specified function should have a dedicated
94/// frame pointer register. This is true if the function has variable sized
95/// allocas or if frame pointer elimination is disabled.
97 const MachineFrameInfo &MFI = MF.getFrameInfo();
98 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
99 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
103 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
104 MFI.hasStackMap() || MFI.hasPatchPoint() ||
105 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
106}
107
108static unsigned getSUBriOpcode(bool IsLP64) {
109 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
110}
111
112static unsigned getADDriOpcode(bool IsLP64) {
113 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
114}
115
116static unsigned getSUBrrOpcode(bool IsLP64) {
117 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
118}
119
120static unsigned getADDrrOpcode(bool IsLP64) {
121 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
122}
123
124static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
125 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
126}
127
128static unsigned getLEArOpcode(bool IsLP64) {
129 return IsLP64 ? X86::LEA64r : X86::LEA32r;
130}
131
132static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
133 if (Use64BitReg) {
134 if (isUInt<32>(Imm))
135 return X86::MOV32ri64;
136 if (isInt<32>(Imm))
137 return X86::MOV64ri32;
138 return X86::MOV64ri;
139 }
140 return X86::MOV32ri;
141}
142
143// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
144// value written by the PUSH from the stack. The processor tracks these marked
145// instructions internally and fast-forwards register data between matching PUSH
146// and POP instructions, without going through memory or through the training
147// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
148// memory-renaming optimization can be used.
149//
150// The PPX hint is purely a performance hint. Instructions with this hint have
151// the same functional semantics as those without. PPX hints set by the
152// compiler that violate the balancing rule may turn off the PPX optimization,
153// but they will not affect program semantics.
154//
155// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
156// are not considered).
157//
158// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
159// GPRs at a time to/from the stack.
160static unsigned getPUSHOpcode(const X86Subtarget &ST) {
161 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
162 : X86::PUSH32r;
163}
164static unsigned getPOPOpcode(const X86Subtarget &ST) {
165 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
166 : X86::POP32r;
167}
168static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
169 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
170}
171static unsigned getPOP2Opcode(const X86Subtarget &ST) {
172 return ST.hasPPX() ? X86::POP2P : X86::POP2;
173}
174
177 MCRegister Reg = RegMask.PhysReg;
178
179 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
180 Reg == X86::AH || Reg == X86::AL)
181 return true;
182 }
183
184 return false;
185}
186
187/// Check if the flags need to be preserved before the terminators.
188/// This would be the case, if the eflags is live-in of the region
189/// composed by the terminators or live-out of that region, without
190/// being defined by a terminator.
191static bool
193 for (const MachineInstr &MI : MBB.terminators()) {
194 bool BreakNext = false;
195 for (const MachineOperand &MO : MI.operands()) {
196 if (!MO.isReg())
197 continue;
198 Register Reg = MO.getReg();
199 if (Reg != X86::EFLAGS)
200 continue;
201
202 // This terminator needs an eflags that is not defined
203 // by a previous another terminator:
204 // EFLAGS is live-in of the region composed by the terminators.
205 if (!MO.isDef())
206 return true;
207 // This terminator defines the eflags, i.e., we don't need to preserve it.
208 // However, we still need to check this specific terminator does not
209 // read a live-in value.
210 BreakNext = true;
211 }
212 // We found a definition of the eflags, no need to preserve them.
213 if (BreakNext)
214 return false;
215 }
216
217 // None of the terminators use or define the eflags.
218 // Check if they are live-out, that would imply we need to preserve them.
219 for (const MachineBasicBlock *Succ : MBB.successors())
220 if (Succ->isLiveIn(X86::EFLAGS))
221 return true;
222
223 return false;
224}
225
226/// emitSPUpdate - Emit a series of instructions to increment / decrement the
227/// stack pointer by a constant value.
230 const DebugLoc &DL, int64_t NumBytes,
231 bool InEpilogue) const {
232 bool isSub = NumBytes < 0;
233 uint64_t Offset = isSub ? -NumBytes : NumBytes;
236
237 if (!Uses64BitFramePtr && !isUInt<32>(Offset)) {
238 // We're being asked to adjust a 32-bit stack pointer by 4 GiB or more.
239 // This might be unreachable code, so don't complain now; just trap if
240 // it's reached at runtime.
241 BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
242 return;
243 }
244
245 uint64_t Chunk = (1LL << 31) - 1;
246
250 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
251
252 // It's ok to not take into account large chunks when probing, as the
253 // allocation is split in smaller chunks anyway.
254 if (EmitInlineStackProbe && !InEpilogue) {
255
256 // This pseudo-instruction is going to be expanded, potentially using a
257 // loop, by inlineStackProbe().
258 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
259 return;
260 } else if (Offset > Chunk) {
261 // Rather than emit a long series of instructions for large offsets,
262 // load the offset into a register and do one sub/add
263 unsigned Reg = 0;
264 unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
265
266 if (isSub && !isEAXLiveIn(MBB))
267 Reg = Rax;
268 else
270 Uses64BitFramePtr ? 64 : 32);
271
272 unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr)
274 if (Reg) {
276 Reg)
277 .addImm(Offset)
278 .setMIFlag(Flag);
279 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
281 .addReg(Reg);
282 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
283 return;
284 } else if (Offset > 8 * Chunk) {
285 // If we would need more than 8 add or sub instructions (a >16GB stack
286 // frame), it's worth spilling RAX to materialize this immediate.
287 // pushq %rax
288 // movabsq +-$Offset+-SlotSize, %rax
289 // addq %rsp, %rax
290 // xchg %rax, (%rsp)
291 // movq (%rsp), %rsp
292 assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
293 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
295 .setMIFlag(Flag);
296 // Subtract is not commutative, so negate the offset and always use add.
297 // Subtract 8 less and add 8 more to account for the PUSH we just did.
298 if (isSub)
299 Offset = -(Offset - SlotSize);
300 else
303 Rax)
304 .addImm(Offset)
305 .setMIFlag(Flag);
306 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
307 .addReg(Rax)
309 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
310 // Exchange the new SP in RAX with the top of the stack.
312 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
313 StackPtr, false, 0);
314 // Load new SP from the top of the stack into RSP.
315 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
316 StackPtr, false, 0);
317 return;
318 }
319 }
320
321 while (Offset) {
322 uint64_t ThisVal = std::min(Offset, Chunk);
323 if (ThisVal == SlotSize) {
324 // Use push / pop for slot sized adjustments as a size optimization. We
325 // need to find a dead register when using pop.
326 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
328 if (Reg) {
329 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
330 : (Is64Bit ? X86::POP64r : X86::POP32r);
331 BuildMI(MBB, MBBI, DL, TII.get(Opc))
332 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
333 .setMIFlag(Flag);
334 Offset -= ThisVal;
335 continue;
336 }
337 }
338
339 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
340 .setMIFlag(Flag);
341
342 Offset -= ThisVal;
343 }
344}
345
346MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
348 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
349 assert(Offset != 0 && "zero offset stack adjustment requested");
350
351 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
352 // is tricky.
353 bool UseLEA;
354 if (!InEpilogue) {
355 // Check if inserting the prologue at the beginning
356 // of MBB would require to use LEA operations.
357 // We need to use LEA operations if EFLAGS is live in, because
358 // it means an instruction will read it before it gets defined.
359 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
360 } else {
361 // If we can use LEA for SP but we shouldn't, check that none
362 // of the terminators uses the eflags. Otherwise we will insert
363 // a ADD that will redefine the eflags and break the condition.
364 // Alternatively, we could move the ADD, but this may not be possible
365 // and is an optimization anyway.
367 if (UseLEA && !STI.useLeaForSP())
369 // If that assert breaks, that means we do not do the right thing
370 // in canUseAsEpilogue.
372 "We shouldn't have allowed this insertion point");
373 }
374
376 if (UseLEA) {
379 StackPtr),
380 StackPtr, false, Offset);
381 } else {
382 bool IsSub = Offset < 0;
383 uint64_t AbsOffset = IsSub ? -Offset : Offset;
384 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
386 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
388 .addImm(AbsOffset);
389 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
390 }
391 return MI;
392}
393
396 bool doMergeWithPrevious) const {
397 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
398 (!doMergeWithPrevious && MBBI == MBB.end()))
399 return 0;
400
401 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
402
404 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
405 // instruction, and that there are no DBG_VALUE or other instructions between
406 // ADD/SUB/LEA and its corresponding CFI instruction.
407 /* TODO: Add support for the case where there are multiple CFI instructions
408 below the ADD/SUB/LEA, e.g.:
409 ...
410 add
411 cfi_def_cfa_offset
412 cfi_offset
413 ...
414 */
415 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
416 PI = std::prev(PI);
417
418 unsigned Opc = PI->getOpcode();
419 int Offset = 0;
420
421 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
422 PI->getOperand(0).getReg() == StackPtr) {
423 assert(PI->getOperand(1).getReg() == StackPtr);
424 Offset = PI->getOperand(2).getImm();
425 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
426 PI->getOperand(0).getReg() == StackPtr &&
427 PI->getOperand(1).getReg() == StackPtr &&
428 PI->getOperand(2).getImm() == 1 &&
429 PI->getOperand(3).getReg() == X86::NoRegister &&
430 PI->getOperand(5).getReg() == X86::NoRegister) {
431 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
432 Offset = PI->getOperand(4).getImm();
433 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
434 PI->getOperand(0).getReg() == StackPtr) {
435 assert(PI->getOperand(1).getReg() == StackPtr);
436 Offset = -PI->getOperand(2).getImm();
437 } else
438 return 0;
439
440 PI = MBB.erase(PI);
441 if (PI != MBB.end() && PI->isCFIInstruction()) {
442 auto CIs = MBB.getParent()->getFrameInstructions();
443 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
446 PI = MBB.erase(PI);
447 }
448 if (!doMergeWithPrevious)
450
451 return Offset;
452}
453
456 const DebugLoc &DL,
457 const MCCFIInstruction &CFIInst,
458 MachineInstr::MIFlag Flag) const {
460 unsigned CFIIndex = MF.addFrameInst(CFIInst);
461
463 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
464
465 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
466 .addCFIIndex(CFIIndex)
467 .setMIFlag(Flag);
468}
469
470/// Emits Dwarf Info specifying offsets of callee saved registers and
471/// frame pointer. This is called only when basic block sections are enabled.
475 if (!hasFP(MF)) {
477 return;
478 }
481 const Register MachineFramePtr =
483 : FramePtr;
484 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
485 // Offset = space for return address + size of the frame pointer itself.
486 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
488 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
490}
491
494 const DebugLoc &DL, bool IsPrologue) const {
496 MachineFrameInfo &MFI = MF.getFrameInfo();
499
500 // Add callee saved registers to move list.
501 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
502
503 // Calculate offsets.
504 for (const CalleeSavedInfo &I : CSI) {
505 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
506 Register Reg = I.getReg();
507 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
508
509 if (IsPrologue) {
510 if (X86FI->getStackPtrSaveMI()) {
511 // +2*SlotSize because there is return address and ebp at the bottom
512 // of the stack.
513 // | retaddr |
514 // | ebp |
515 // | |<--ebp
516 Offset += 2 * SlotSize;
517 SmallString<64> CfaExpr;
518 CfaExpr.push_back(dwarf::DW_CFA_expression);
519 uint8_t buffer[16];
520 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
521 CfaExpr.push_back(2);
523 const Register MachineFramePtr =
526 : FramePtr;
527 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
528 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
529 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
531 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
533 } else {
535 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
536 }
537 } else {
539 MCCFIInstruction::createRestore(nullptr, DwarfReg));
540 }
541 }
542 if (auto *MI = X86FI->getStackPtrSaveMI()) {
543 int FI = MI->getOperand(1).getIndex();
544 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
545 SmallString<64> CfaExpr;
547 const Register MachineFramePtr =
550 : FramePtr;
551 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
552 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
553 uint8_t buffer[16];
554 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
555 CfaExpr.push_back(dwarf::DW_OP_deref);
556
557 SmallString<64> DefCfaExpr;
558 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
559 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
560 DefCfaExpr.append(CfaExpr.str());
561 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
563 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
565 }
566}
567
568void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
569 MachineBasicBlock &MBB) const {
570 const MachineFunction &MF = *MBB.getParent();
571
572 // Insertion point.
574
575 // Fake a debug loc.
576 DebugLoc DL;
577 if (MBBI != MBB.end())
578 DL = MBBI->getDebugLoc();
579
580 // Zero out FP stack if referenced. Do this outside of the loop below so that
581 // it's done only once.
582 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
583 for (MCRegister Reg : RegsToZero.set_bits()) {
584 if (!X86::RFP80RegClass.contains(Reg))
585 continue;
586
587 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
588 for (unsigned i = 0; i != NumFPRegs; ++i)
589 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
590
591 for (unsigned i = 0; i != NumFPRegs; ++i)
592 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
593 break;
594 }
595
596 // For GPRs, we only care to clear out the 32-bit register.
597 BitVector GPRsToZero(TRI->getNumRegs());
598 for (MCRegister Reg : RegsToZero.set_bits())
599 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
600 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
601 RegsToZero.reset(Reg);
602 }
603
604 // Zero out the GPRs first.
605 for (MCRegister Reg : GPRsToZero.set_bits())
607
608 // Zero out the remaining registers.
609 for (MCRegister Reg : RegsToZero.set_bits())
611}
612
615 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
616 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
619 if (InProlog) {
620 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
621 .addImm(0 /* no explicit stack size */);
622 } else {
623 emitStackProbeInline(MF, MBB, MBBI, DL, false);
624 }
625 } else {
626 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
627 }
628}
629
631 return STI.isOSWindows() && !STI.isTargetWin64();
632}
633
635 MachineBasicBlock &PrologMBB) const {
636 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
637 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
638 });
639 if (Where != PrologMBB.end()) {
640 DebugLoc DL = PrologMBB.findDebugLoc(Where);
641 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
642 Where->eraseFromParent();
643 }
644}
645
646void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
649 const DebugLoc &DL,
650 bool InProlog) const {
652 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
653 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
654 else
655 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
656}
657
658void X86FrameLowering::emitStackProbeInlineGeneric(
660 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
661 MachineInstr &AllocWithProbe = *MBBI;
662 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
663
666 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
667 "different expansion expected for CoreCLR 64 bit");
668
669 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
670 uint64_t ProbeChunk = StackProbeSize * 8;
671
672 uint64_t MaxAlign =
673 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
674
675 // Synthesize a loop or unroll it, depending on the number of iterations.
676 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
677 // between the unaligned rsp and current rsp.
678 if (Offset > ProbeChunk) {
679 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
680 MaxAlign % StackProbeSize);
681 } else {
682 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
683 MaxAlign % StackProbeSize);
684 }
685}
686
687void X86FrameLowering::emitStackProbeInlineGenericBlock(
690 uint64_t AlignOffset) const {
691
692 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
693 const bool HasFP = hasFP(MF);
696 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
697 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
698
699 uint64_t CurrentOffset = 0;
700
701 assert(AlignOffset < StackProbeSize);
702
703 // If the offset is so small it fits within a page, there's nothing to do.
704 if (StackProbeSize < Offset + AlignOffset) {
705
706 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
707 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
709 if (!HasFP && NeedsDwarfCFI) {
710 BuildCFI(
711 MBB, MBBI, DL,
712 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
713 }
714
715 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
717 StackPtr, false, 0)
718 .addImm(0)
720 NumFrameExtraProbe++;
721 CurrentOffset = StackProbeSize - AlignOffset;
722 }
723
724 // For the next N - 1 pages, just probe. I tried to take advantage of
725 // natural probes but it implies much more logic and there was very few
726 // interesting natural probes to interleave.
727 while (CurrentOffset + StackProbeSize < Offset) {
728 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
730
731 if (!HasFP && NeedsDwarfCFI) {
732 BuildCFI(
733 MBB, MBBI, DL,
734 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
735 }
736 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
738 StackPtr, false, 0)
739 .addImm(0)
741 NumFrameExtraProbe++;
742 CurrentOffset += StackProbeSize;
743 }
744
745 // No need to probe the tail, it is smaller than a Page.
746 uint64_t ChunkSize = Offset - CurrentOffset;
747 if (ChunkSize == SlotSize) {
748 // Use push for slot sized adjustments as a size optimization,
749 // like emitSPUpdate does when not probing.
750 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
751 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
752 BuildMI(MBB, MBBI, DL, TII.get(Opc))
755 } else {
756 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
758 }
759 // No need to adjust Dwarf CFA offset here, the last position of the stack has
760 // been defined
761}
762
763void X86FrameLowering::emitStackProbeInlineGenericLoop(
766 uint64_t AlignOffset) const {
767 assert(Offset && "null offset");
768
769 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
771 "Inline stack probe loop will clobber live EFLAGS.");
772
773 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
774 const bool HasFP = hasFP(MF);
777 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
778 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
779
780 if (AlignOffset) {
781 if (AlignOffset < StackProbeSize) {
782 // Perform a first smaller allocation followed by a probe.
783 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
785
786 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
788 StackPtr, false, 0)
789 .addImm(0)
791 NumFrameExtraProbe++;
792 Offset -= AlignOffset;
793 }
794 }
795
796 // Synthesize a loop
797 NumFrameLoopProbe++;
798 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
799
800 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
801 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
802
804 MF.insert(MBBIter, testMBB);
805 MF.insert(MBBIter, tailMBB);
806
807 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
808 : Is64Bit ? X86::R11D
809 : X86::EAX;
810
811 // save loop bound
812 {
813 const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
814
815 // Can we calculate the loop bound using SUB with a 32-bit immediate?
816 // Note that the immediate gets sign-extended when used with a 64-bit
817 // register, so in that case we only have 31 bits to work with.
818 bool canUseSub =
819 Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
820
821 if (canUseSub) {
822 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
823
824 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
827 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
828 .addReg(FinalStackProbed)
829 .addImm(BoundOffset)
831 } else if (Uses64BitFramePtr) {
832 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
833 .addImm(-BoundOffset)
835 BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
836 .addReg(FinalStackProbed)
839 } else {
840 llvm_unreachable("Offset too large for 32-bit stack pointer");
841 }
842
843 // while in the loop, use loop-invariant reg for CFI,
844 // instead of the stack pointer, which changes during the loop
845 if (!HasFP && NeedsDwarfCFI) {
846 // x32 uses the same DWARF register numbers as x86-64,
847 // so there isn't a register number for r11d, we must use r11 instead
848 const Register DwarfFinalStackProbed =
850 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
851 : FinalStackProbed;
852
855 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
857 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
858 }
859 }
860
861 // allocate a page
862 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
863 /*InEpilogue=*/false)
865
866 // touch the page
867 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
869 StackPtr, false, 0)
870 .addImm(0)
872
873 // cmp with stack pointer bound
874 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
876 .addReg(FinalStackProbed)
878
879 // jump
880 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
881 .addMBB(testMBB)
884 testMBB->addSuccessor(testMBB);
885 testMBB->addSuccessor(tailMBB);
886
887 // BB management
888 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
890 MBB.addSuccessor(testMBB);
891
892 // handle tail
893 const uint64_t TailOffset = Offset % StackProbeSize;
894 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
895 if (TailOffset) {
896 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
897 /*InEpilogue=*/false)
899 }
900
901 // after the loop, switch back to stack pointer for CFI
902 if (!HasFP && NeedsDwarfCFI) {
903 // x32 uses the same DWARF register numbers as x86-64,
904 // so there isn't a register number for esp, we must use rsp instead
905 const Register DwarfStackPtr =
909
910 BuildCFI(*tailMBB, TailMBBIter, DL,
912 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
913 }
914
915 // Update Live In information
916 fullyRecomputeLiveIns({tailMBB, testMBB});
917}
918
919void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
921 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
923 assert(STI.is64Bit() && "different expansion needed for 32 bit");
924 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
926 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
927
928 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
930 "Inline stack probe loop will clobber live EFLAGS.");
931
932 // RAX contains the number of bytes of desired stack adjustment.
933 // The handling here assumes this value has already been updated so as to
934 // maintain stack alignment.
935 //
936 // We need to exit with RSP modified by this amount and execute suitable
937 // page touches to notify the OS that we're growing the stack responsibly.
938 // All stack probing must be done without modifying RSP.
939 //
940 // MBB:
941 // SizeReg = RAX;
942 // ZeroReg = 0
943 // CopyReg = RSP
944 // Flags, TestReg = CopyReg - SizeReg
945 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
946 // LimitReg = gs magic thread env access
947 // if FinalReg >= LimitReg goto ContinueMBB
948 // RoundBB:
949 // RoundReg = page address of FinalReg
950 // LoopMBB:
951 // LoopReg = PHI(LimitReg,ProbeReg)
952 // ProbeReg = LoopReg - PageSize
953 // [ProbeReg] = 0
954 // if (ProbeReg > RoundReg) goto LoopMBB
955 // ContinueMBB:
956 // RSP = RSP - RAX
957 // [rest of original MBB]
958
959 // Set up the new basic blocks
960 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
961 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
962 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
963
964 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
965 MF.insert(MBBIter, RoundMBB);
966 MF.insert(MBBIter, LoopMBB);
967 MF.insert(MBBIter, ContinueMBB);
968
969 // Split MBB and move the tail portion down to ContinueMBB.
970 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
971 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
973
974 // Some useful constants
975 const int64_t ThreadEnvironmentStackLimit = 0x10;
976 const int64_t PageSize = 0x1000;
977 const int64_t PageMask = ~(PageSize - 1);
978
979 // Registers we need. For the normal case we use virtual
980 // registers. For the prolog expansion we use RAX, RCX and RDX.
982 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
983 const Register
984 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
985 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
986 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
987 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
988 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
989 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
990 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
991 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
992 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
993
994 // SP-relative offsets where we can save RCX and RDX.
995 int64_t RCXShadowSlot = 0;
996 int64_t RDXShadowSlot = 0;
997
998 // If inlining in the prolog, save RCX and RDX.
999 if (InProlog) {
1000 // Compute the offsets. We need to account for things already
1001 // pushed onto the stack at this point: return address, frame
1002 // pointer (if used), and callee saves.
1004 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1005 const bool HasFP = hasFP(MF);
1006
1007 // Check if we need to spill RCX and/or RDX.
1008 // Here we assume that no earlier prologue instruction changes RCX and/or
1009 // RDX, so checking the block live-ins is enough.
1010 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1011 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1012 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1013 // Assign the initial slot to both registers, then change RDX's slot if both
1014 // need to be spilled.
1015 if (IsRCXLiveIn)
1016 RCXShadowSlot = InitSlot;
1017 if (IsRDXLiveIn)
1018 RDXShadowSlot = InitSlot;
1019 if (IsRDXLiveIn && IsRCXLiveIn)
1020 RDXShadowSlot += 8;
1021 // Emit the saves if needed.
1022 if (IsRCXLiveIn)
1023 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1024 RCXShadowSlot)
1025 .addReg(X86::RCX);
1026 if (IsRDXLiveIn)
1027 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1028 RDXShadowSlot)
1029 .addReg(X86::RDX);
1030 } else {
1031 // Not in the prolog. Copy RAX to a virtual reg.
1032 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1033 }
1034
1035 // Add code to MBB to check for overflow and set the new target stack pointer
1036 // to zero if so.
1037 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1038 .addReg(ZeroReg, RegState::Undef)
1039 .addReg(ZeroReg, RegState::Undef);
1040 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1041 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1042 .addReg(CopyReg)
1043 .addReg(SizeReg);
1044 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1045 .addReg(TestReg)
1046 .addReg(ZeroReg)
1048
1049 // FinalReg now holds final stack pointer value, or zero if
1050 // allocation would overflow. Compare against the current stack
1051 // limit from the thread environment block. Note this limit is the
1052 // lowest touched page on the stack, not the point at which the OS
1053 // will cause an overflow exception, so this is just an optimization
1054 // to avoid unnecessarily touching pages that are below the current
1055 // SP but already committed to the stack by the OS.
1056 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1057 .addReg(0)
1058 .addImm(1)
1059 .addReg(0)
1060 .addImm(ThreadEnvironmentStackLimit)
1061 .addReg(X86::GS);
1062 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1063 // Jump if the desired stack pointer is at or above the stack limit.
1064 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1065 .addMBB(ContinueMBB)
1067
1068 // Add code to roundMBB to round the final stack pointer to a page boundary.
1069 if (InProlog)
1070 RoundMBB->addLiveIn(FinalReg);
1071 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1072 .addReg(FinalReg)
1073 .addImm(PageMask);
1074 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1075
1076 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1077 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1078 // and probe until we reach RoundedReg.
1079 if (!InProlog) {
1080 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1081 .addReg(LimitReg)
1082 .addMBB(RoundMBB)
1083 .addReg(ProbeReg)
1084 .addMBB(LoopMBB);
1085 }
1086
1087 if (InProlog)
1088 LoopMBB->addLiveIn(JoinReg);
1089 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1090 false, -PageSize);
1091
1092 // Probe by storing a byte onto the stack.
1093 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1094 .addReg(ProbeReg)
1095 .addImm(1)
1096 .addReg(0)
1097 .addImm(0)
1098 .addReg(0)
1099 .addImm(0);
1100
1101 if (InProlog)
1102 LoopMBB->addLiveIn(RoundedReg);
1103 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1104 .addReg(RoundedReg)
1105 .addReg(ProbeReg);
1106 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1107 .addMBB(LoopMBB)
1109
1110 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1111
1112 // If in prolog, restore RDX and RCX.
1113 if (InProlog) {
1114 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1115 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1116 TII.get(X86::MOV64rm), X86::RCX),
1117 X86::RSP, false, RCXShadowSlot);
1118 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1119 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1120 TII.get(X86::MOV64rm), X86::RDX),
1121 X86::RSP, false, RDXShadowSlot);
1122 }
1123
1124 // Now that the probing is done, add code to continueMBB to update
1125 // the stack pointer for real.
1126 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1127 .addReg(X86::RSP)
1128 .addReg(SizeReg);
1129
1130 // Add the control flow edges we need.
1131 MBB.addSuccessor(ContinueMBB);
1132 MBB.addSuccessor(RoundMBB);
1133 RoundMBB->addSuccessor(LoopMBB);
1134 LoopMBB->addSuccessor(ContinueMBB);
1135 LoopMBB->addSuccessor(LoopMBB);
1136
1137 if (InProlog) {
1138 LivePhysRegs LiveRegs;
1139 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1140 }
1141
1142 // Mark all the instructions added to the prolog as frame setup.
1143 if (InProlog) {
1144 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1145 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1146 }
1147 for (MachineInstr &MI : *RoundMBB) {
1149 }
1150 for (MachineInstr &MI : *LoopMBB) {
1152 }
1153 for (MachineInstr &MI :
1154 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1156 }
1157 }
1158}
1159
1160void X86FrameLowering::emitStackProbeCall(
1162 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1163 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1164 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1165
1166 // FIXME: Add indirect thunk support and remove this.
1167 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1168 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1169 "code model and indirect thunks not yet implemented.");
1170
1171 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1173 "Stack probe calls will clobber live EFLAGS.");
1174
1175 unsigned CallOp;
1176 if (Is64Bit)
1177 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1178 else
1179 CallOp = X86::CALLpcrel32;
1180
1182
1184 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1185
1186 // All current stack probes take AX and SP as input, clobber flags, and
1187 // preserve all registers. x86_64 probes leave RSP unmodified.
1189 // For the large code model, we have to call through a register. Use R11,
1190 // as it is scratch in all supported calling conventions.
1191 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1193 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1194 } else {
1195 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1197 }
1198
1199 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1200 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1206
1207 MachineInstr *ModInst = CI;
1208 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1209 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1210 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1211 // themselves. They also does not clobber %rax so we can reuse it when
1212 // adjusting %rsp.
1213 // All other platforms do not specify a particular ABI for the stack probe
1214 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1215 ModInst =
1217 .addReg(SP)
1218 .addReg(AX);
1219 }
1220
1221 // DebugInfo variable locations -- if there's an instruction number for the
1222 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1223 // modifies SP.
1224 if (InstrNum) {
1225 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1226 // Label destination operand of the subtract.
1227 MF.makeDebugValueSubstitution(*InstrNum,
1228 {ModInst->getDebugInstrNum(), 0});
1229 } else {
1230 // Label the call. The operand number is the penultimate operand, zero
1231 // based.
1232 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1234 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1235 }
1236 }
1237
1238 if (InProlog) {
1239 // Apply the frame setup flag to all inserted instrs.
1240 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1241 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1242 }
1243}
1244
1245static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1246 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1247 // and might require smaller successive adjustments.
1248 const uint64_t Win64MaxSEHOffset = 128;
1249 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1250 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1251 return SEHFrameOffset & -16;
1252}
1253
1254// If we're forcing a stack realignment we can't rely on just the frame
1255// info, we need to know the ABI stack alignment as well in case we
1256// have a call out. Otherwise just make sure we have some alignment - we'll
1257// go with the minimum SlotSize.
1259X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1260 const MachineFrameInfo &MFI = MF.getFrameInfo();
1261 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1262 Align StackAlign = getStackAlign();
1263 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1264 if (HasRealign) {
1265 if (MFI.hasCalls())
1266 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1267 else if (MaxAlign < SlotSize)
1268 MaxAlign = Align(SlotSize);
1269 }
1270
1272 if (HasRealign)
1273 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1274 else
1275 MaxAlign = Align(16);
1276 }
1277 return MaxAlign.value();
1278}
1279
1280void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1282 const DebugLoc &DL, unsigned Reg,
1283 uint64_t MaxAlign) const {
1284 uint64_t Val = -MaxAlign;
1285 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1286
1287 MachineFunction &MF = *MBB.getParent();
1289 const X86TargetLowering &TLI = *STI.getTargetLowering();
1290 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1291 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1292
1293 // We want to make sure that (in worst case) less than StackProbeSize bytes
1294 // are not probed after the AND. This assumption is used in
1295 // emitStackProbeInlineGeneric.
1296 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1297 {
1298 NumFrameLoopProbe++;
1299 MachineBasicBlock *entryMBB =
1301 MachineBasicBlock *headMBB =
1303 MachineBasicBlock *bodyMBB =
1305 MachineBasicBlock *footMBB =
1307
1309 MF.insert(MBBIter, entryMBB);
1310 MF.insert(MBBIter, headMBB);
1311 MF.insert(MBBIter, bodyMBB);
1312 MF.insert(MBBIter, footMBB);
1313 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1314 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1315 : Is64Bit ? X86::R11D
1316 : X86::EAX;
1317
1318 // Setup entry block
1319 {
1320
1321 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1322 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1325 MachineInstr *MI =
1326 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1327 .addReg(FinalStackProbed)
1328 .addImm(Val)
1330
1331 // The EFLAGS implicit def is dead.
1332 MI->getOperand(3).setIsDead();
1333
1334 BuildMI(entryMBB, DL,
1335 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1336 .addReg(FinalStackProbed)
1339 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1340 .addMBB(&MBB)
1343 entryMBB->addSuccessor(headMBB);
1344 entryMBB->addSuccessor(&MBB);
1345 }
1346
1347 // Loop entry block
1348
1349 {
1350 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1351 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1353 .addImm(StackProbeSize)
1355
1356 BuildMI(headMBB, DL,
1357 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1359 .addReg(FinalStackProbed)
1361
1362 // jump to the footer if StackPtr < FinalStackProbed
1363 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1364 .addMBB(footMBB)
1367
1368 headMBB->addSuccessor(bodyMBB);
1369 headMBB->addSuccessor(footMBB);
1370 }
1371
1372 // setup loop body
1373 {
1374 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1376 StackPtr, false, 0)
1377 .addImm(0)
1379
1380 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1381 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1383 .addImm(StackProbeSize)
1385
1386 // cmp with stack pointer bound
1387 BuildMI(bodyMBB, DL,
1388 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1389 .addReg(FinalStackProbed)
1392
1393 // jump back while FinalStackProbed < StackPtr
1394 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1395 .addMBB(bodyMBB)
1398 bodyMBB->addSuccessor(bodyMBB);
1399 bodyMBB->addSuccessor(footMBB);
1400 }
1401
1402 // setup loop footer
1403 {
1404 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1405 .addReg(FinalStackProbed)
1407 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1409 StackPtr, false, 0)
1410 .addImm(0)
1412 footMBB->addSuccessor(&MBB);
1413 }
1414
1415 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1416 }
1417 } else {
1418 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1419 .addReg(Reg)
1420 .addImm(Val)
1422
1423 // The EFLAGS implicit def is dead.
1424 MI->getOperand(3).setIsDead();
1425 }
1426}
1427
1429 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1430 // clobbered by any interrupt handler.
1431 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1432 "MF used frame lowering for wrong subtarget");
1433 const Function &Fn = MF.getFunction();
1434 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1435 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1436}
1437
1438/// Return true if we need to use the restricted Windows x64 prologue and
1439/// epilogue code patterns that can be described with WinCFI (.seh_*
1440/// directives).
1441bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1442 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1443}
1444
1445bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1446 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1447}
1448
1449/// Return true if an opcode is part of the REP group of instructions
1450static bool isOpcodeRep(unsigned Opcode) {
1451 switch (Opcode) {
1452 case X86::REPNE_PREFIX:
1453 case X86::REP_MOVSB_32:
1454 case X86::REP_MOVSB_64:
1455 case X86::REP_MOVSD_32:
1456 case X86::REP_MOVSD_64:
1457 case X86::REP_MOVSQ_32:
1458 case X86::REP_MOVSQ_64:
1459 case X86::REP_MOVSW_32:
1460 case X86::REP_MOVSW_64:
1461 case X86::REP_PREFIX:
1462 case X86::REP_STOSB_32:
1463 case X86::REP_STOSB_64:
1464 case X86::REP_STOSD_32:
1465 case X86::REP_STOSD_64:
1466 case X86::REP_STOSQ_32:
1467 case X86::REP_STOSQ_64:
1468 case X86::REP_STOSW_32:
1469 case X86::REP_STOSW_64:
1470 return true;
1471 default:
1472 break;
1473 }
1474 return false;
1475}
1476
1477/// emitPrologue - Push callee-saved registers onto the stack, which
1478/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1479/// space for local variables. Also emit labels used by the exception handler to
1480/// generate the exception handling frames.
1481
1482/*
1483 Here's a gist of what gets emitted:
1484
1485 ; Establish frame pointer, if needed
1486 [if needs FP]
1487 push %rbp
1488 .cfi_def_cfa_offset 16
1489 .cfi_offset %rbp, -16
1490 .seh_pushreg %rpb
1491 mov %rsp, %rbp
1492 .cfi_def_cfa_register %rbp
1493
1494 ; Spill general-purpose registers
1495 [for all callee-saved GPRs]
1496 pushq %<reg>
1497 [if not needs FP]
1498 .cfi_def_cfa_offset (offset from RETADDR)
1499 .seh_pushreg %<reg>
1500
1501 ; If the required stack alignment > default stack alignment
1502 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1503 ; of unknown size in the stack frame.
1504 [if stack needs re-alignment]
1505 and $MASK, %rsp
1506
1507 ; Allocate space for locals
1508 [if target is Windows and allocated space > 4096 bytes]
1509 ; Windows needs special care for allocations larger
1510 ; than one page.
1511 mov $NNN, %rax
1512 call ___chkstk_ms/___chkstk
1513 sub %rax, %rsp
1514 [else]
1515 sub $NNN, %rsp
1516
1517 [if needs FP]
1518 .seh_stackalloc (size of XMM spill slots)
1519 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1520 [else]
1521 .seh_stackalloc NNN
1522
1523 ; Spill XMMs
1524 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1525 ; they may get spilled on any platform, if the current function
1526 ; calls @llvm.eh.unwind.init
1527 [if needs FP]
1528 [for all callee-saved XMM registers]
1529 movaps %<xmm reg>, -MMM(%rbp)
1530 [for all callee-saved XMM registers]
1531 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1532 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1533 [else]
1534 [for all callee-saved XMM registers]
1535 movaps %<xmm reg>, KKK(%rsp)
1536 [for all callee-saved XMM registers]
1537 .seh_savexmm %<xmm reg>, KKK
1538
1539 .seh_endprologue
1540
1541 [if needs base pointer]
1542 mov %rsp, %rbx
1543 [if needs to restore base pointer]
1544 mov %rsp, -MMM(%rbp)
1545
1546 ; Emit CFI info
1547 [if needs FP]
1548 [for all callee-saved registers]
1549 .cfi_offset %<reg>, (offset from %rbp)
1550 [else]
1551 .cfi_def_cfa_offset (offset from RETADDR)
1552 [for all callee-saved registers]
1553 .cfi_offset %<reg>, (offset from %rsp)
1554
1555 Notes:
1556 - .seh directives are emitted only for Windows 64 ABI
1557 - .cv_fpo directives are emitted on win32 when emitting CodeView
1558 - .cfi directives are emitted for all other ABIs
1559 - for 32-bit code, substitute %e?? registers for %r??
1560*/
1561
1563 MachineBasicBlock &MBB) const {
1564 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1565 "MF used frame lowering for wrong subtarget");
1567 MachineFrameInfo &MFI = MF.getFrameInfo();
1568 const Function &Fn = MF.getFunction();
1570 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1571 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1572 bool IsFunclet = MBB.isEHFuncletEntry();
1574 if (Fn.hasPersonalityFn())
1575 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1576 bool FnHasClrFunclet =
1577 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1578 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1579 bool HasFP = hasFP(MF);
1580 bool IsWin64Prologue = isWin64Prologue(MF);
1581 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1582 // FIXME: Emit FPO data for EH funclets.
1583 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1585 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1586 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1588 const Register MachineFramePtr =
1590 : FramePtr;
1591 Register BasePtr = TRI->getBaseRegister();
1592 bool HasWinCFI = false;
1593
1594 // Debug location must be unknown since the first debug location is used
1595 // to determine the end of the prologue.
1596 DebugLoc DL;
1597 Register ArgBaseReg;
1598
1599 // Emit extra prolog for argument stack slot reference.
1600 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1601 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1602 // Creat extra prolog for stack realignment.
1603 ArgBaseReg = MI->getOperand(0).getReg();
1604 // leal 4(%esp), %basereg
1605 // .cfi_def_cfa %basereg, 0
1606 // andl $-128, %esp
1607 // pushl -4(%basereg)
1608 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1609 ArgBaseReg)
1611 .addImm(1)
1612 .addUse(X86::NoRegister)
1614 .addUse(X86::NoRegister)
1616 if (NeedsDwarfCFI) {
1617 // .cfi_def_cfa %basereg, 0
1618 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1619 BuildCFI(MBB, MBBI, DL,
1620 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1622 }
1623 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1624 int64_t Offset = -(int64_t)SlotSize;
1625 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1626 .addReg(ArgBaseReg)
1627 .addImm(1)
1628 .addReg(X86::NoRegister)
1629 .addImm(Offset)
1630 .addReg(X86::NoRegister)
1632 }
1633
1634 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1635 // tail call.
1636 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1637 if (TailCallArgReserveSize && IsWin64Prologue)
1638 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1639
1640 const bool EmitStackProbeCall =
1642 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1643
1644 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1648 // The special symbol below is absolute and has a *value* suitable to be
1649 // combined with the frame pointer directly.
1650 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1651 .addUse(MachineFramePtr)
1652 .addUse(X86::RIP)
1653 .addImm(1)
1654 .addUse(X86::NoRegister)
1655 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1657 .addUse(X86::NoRegister);
1658 break;
1659 }
1660 [[fallthrough]];
1661
1663 assert(
1664 !IsWin64Prologue &&
1665 "win64 prologue does not set the bit 60 in the saved frame pointer");
1666 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1667 .addUse(MachineFramePtr)
1668 .addImm(60)
1670 break;
1671
1673 break;
1674 }
1675 }
1676
1677 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1678 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1679 // stack alignment.
1681 Fn.arg_size() == 2) {
1682 StackSize += 8;
1683 MFI.setStackSize(StackSize);
1684
1685 // Update the stack pointer by pushing a register. This is the instruction
1686 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1687 // Hard-coding the update to a push avoids emitting a second
1688 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1689 // probing isn't needed anyways for an 8-byte update.
1690 // Pushing a register leaves us in a similar situation to a regular
1691 // function call where we know that the address at (rsp-8) is writeable.
1692 // That way we avoid any off-by-ones with stack probing for additional
1693 // stack pointer updates later on.
1694 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1695 .addReg(X86::RAX, RegState::Undef)
1697 }
1698
1699 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1700 // function, and use up to 128 bytes of stack space, don't have a frame
1701 // pointer, calls, or dynamic alloca then we do not need to adjust the
1702 // stack pointer (we fit in the Red Zone). We also check that we don't
1703 // push and pop from the stack.
1704 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1705 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1706 !MFI.adjustsStack() && // No calls.
1707 !EmitStackProbeCall && // No stack probes.
1708 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1709 !MF.shouldSplitStack()) { // Regular stack
1710 uint64_t MinSize =
1712 if (HasFP)
1713 MinSize += SlotSize;
1714 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1715 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1716 MFI.setStackSize(StackSize);
1717 }
1718
1719 // Insert stack pointer adjustment for later moving of return addr. Only
1720 // applies to tail call optimized functions where the callee argument stack
1721 // size is bigger than the callers.
1722 if (TailCallArgReserveSize != 0) {
1723 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1724 /*InEpilogue=*/false)
1726 }
1727
1728 // Mapping for machine moves:
1729 //
1730 // DST: VirtualFP AND
1731 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1732 // ELSE => DW_CFA_def_cfa
1733 //
1734 // SRC: VirtualFP AND
1735 // DST: Register => DW_CFA_def_cfa_register
1736 //
1737 // ELSE
1738 // OFFSET < 0 => DW_CFA_offset_extended_sf
1739 // REG < 64 => DW_CFA_offset + Reg
1740 // ELSE => DW_CFA_offset_extended
1741
1742 uint64_t NumBytes = 0;
1743 int stackGrowth = -SlotSize;
1744
1745 // Find the funclet establisher parameter
1746 Register Establisher = X86::NoRegister;
1747 if (IsClrFunclet)
1748 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1749 else if (IsFunclet)
1750 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1751
1752 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1753 // Immediately spill establisher into the home slot.
1754 // The runtime cares about this.
1755 // MOV64mr %rdx, 16(%rsp)
1756 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1757 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1758 .addReg(Establisher)
1760 MBB.addLiveIn(Establisher);
1761 }
1762
1763 if (HasFP) {
1764 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1765
1766 // Calculate required stack adjustment.
1767 uint64_t FrameSize = StackSize - SlotSize;
1768 NumBytes =
1769 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1770
1771 // Callee-saved registers are pushed on stack before the stack is realigned.
1772 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1773 NumBytes = alignTo(NumBytes, MaxAlign);
1774
1775 // Save EBP/RBP into the appropriate stack slot.
1776 BuildMI(MBB, MBBI, DL,
1778 .addReg(MachineFramePtr, RegState::Kill)
1780
1781 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1782 // Mark the place where EBP/RBP was saved.
1783 // Define the current CFA rule to use the provided offset.
1784 assert(StackSize);
1785 BuildCFI(MBB, MBBI, DL,
1787 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1789
1790 // Change the rule for the FramePtr to be an "offset" rule.
1791 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1792 BuildCFI(MBB, MBBI, DL,
1793 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1794 2 * stackGrowth -
1795 (int)TailCallArgReserveSize),
1797 }
1798
1799 if (NeedsWinCFI) {
1800 HasWinCFI = true;
1801 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1804 }
1805
1806 if (!IsFunclet) {
1807 if (X86FI->hasSwiftAsyncContext()) {
1808 assert(!IsWin64Prologue &&
1809 "win64 prologue does not store async context right below rbp");
1810 const auto &Attrs = MF.getFunction().getAttributes();
1811
1812 // Before we update the live frame pointer we have to ensure there's a
1813 // valid (or null) asynchronous context in its slot just before FP in
1814 // the frame record, so store it now.
1815 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1816 // We have an initial context in r14, store it just before the frame
1817 // pointer.
1818 MBB.addLiveIn(X86::R14);
1819 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1820 .addReg(X86::R14)
1822 } else {
1823 // No initial context, store null so that there's no pointer that
1824 // could be misused.
1825 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1826 .addImm(0)
1828 }
1829
1830 if (NeedsWinCFI) {
1831 HasWinCFI = true;
1832 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1833 .addImm(X86::R14)
1835 }
1836
1837 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1838 .addUse(X86::RSP)
1839 .addImm(1)
1840 .addUse(X86::NoRegister)
1841 .addImm(8)
1842 .addUse(X86::NoRegister)
1844 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1845 .addUse(X86::RSP)
1846 .addImm(8)
1848 }
1849
1850 if (!IsWin64Prologue && !IsFunclet) {
1851 // Update EBP with the new base value.
1852 if (!X86FI->hasSwiftAsyncContext())
1853 BuildMI(MBB, MBBI, DL,
1854 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1855 FramePtr)
1858
1859 if (NeedsDwarfCFI) {
1860 if (ArgBaseReg.isValid()) {
1861 SmallString<64> CfaExpr;
1862 CfaExpr.push_back(dwarf::DW_CFA_expression);
1863 uint8_t buffer[16];
1864 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1865 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1866 CfaExpr.push_back(2);
1867 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1868 CfaExpr.push_back(0);
1869 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1870 BuildCFI(MBB, MBBI, DL,
1871 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1873 } else {
1874 // Mark effective beginning of when frame pointer becomes valid.
1875 // Define the current CFA to use the EBP/RBP register.
1876 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1877 BuildCFI(
1878 MBB, MBBI, DL,
1879 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1881 }
1882 }
1883
1884 if (NeedsWinFPO) {
1885 // .cv_fpo_setframe $FramePtr
1886 HasWinCFI = true;
1887 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1889 .addImm(0)
1891 }
1892 }
1893 }
1894 } else {
1895 assert(!IsFunclet && "funclets without FPs not yet implemented");
1896 NumBytes =
1897 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1898 }
1899
1900 // Update the offset adjustment, which is mainly used by codeview to translate
1901 // from ESP to VFRAME relative local variable offsets.
1902 if (!IsFunclet) {
1903 if (HasFP && TRI->hasStackRealignment(MF))
1904 MFI.setOffsetAdjustment(-NumBytes);
1905 else
1906 MFI.setOffsetAdjustment(-StackSize);
1907 }
1908
1909 // For EH funclets, only allocate enough space for outgoing calls. Save the
1910 // NumBytes value that we would've used for the parent frame.
1911 unsigned ParentFrameNumBytes = NumBytes;
1912 if (IsFunclet)
1913 NumBytes = getWinEHFuncletFrameSize(MF);
1914
1915 // Skip the callee-saved push instructions.
1916 bool PushedRegs = false;
1917 int StackOffset = 2 * stackGrowth;
1919 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1920 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1921 return false;
1922 unsigned Opc = MBBI->getOpcode();
1923 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1924 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1925 };
1926
1927 while (IsCSPush(MBBI)) {
1928 PushedRegs = true;
1929 Register Reg = MBBI->getOperand(0).getReg();
1930 LastCSPush = MBBI;
1931 ++MBBI;
1932 unsigned Opc = LastCSPush->getOpcode();
1933
1934 if (!HasFP && NeedsDwarfCFI) {
1935 // Mark callee-saved push instruction.
1936 // Define the current CFA rule to use the provided offset.
1937 assert(StackSize);
1938 // Compared to push, push2 introduces more stack offset (one more
1939 // register).
1940 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1941 StackOffset += stackGrowth;
1942 BuildCFI(MBB, MBBI, DL,
1945 StackOffset += stackGrowth;
1946 }
1947
1948 if (NeedsWinCFI) {
1949 HasWinCFI = true;
1950 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1951 .addImm(Reg)
1953 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1954 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1955 .addImm(LastCSPush->getOperand(1).getReg())
1957 }
1958 }
1959
1960 // Realign stack after we pushed callee-saved registers (so that we'll be
1961 // able to calculate their offsets from the frame pointer).
1962 // Don't do this for Win64, it needs to realign the stack after the prologue.
1963 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1964 !ArgBaseReg.isValid()) {
1965 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1966 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1967
1968 if (NeedsWinCFI) {
1969 HasWinCFI = true;
1970 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1971 .addImm(MaxAlign)
1973 }
1974 }
1975
1976 // If there is an SUB32ri of ESP immediately before this instruction, merge
1977 // the two. This can be the case when tail call elimination is enabled and
1978 // the callee has more arguments then the caller.
1979 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1980
1981 // Adjust stack pointer: ESP -= numbytes.
1982
1983 // Windows and cygwin/mingw require a prologue helper routine when allocating
1984 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1985 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1986 // stack and adjust the stack pointer in one go. The 64-bit version of
1987 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1988 // responsible for adjusting the stack pointer. Touching the stack at 4K
1989 // increments is necessary to ensure that the guard pages used by the OS
1990 // virtual memory manager are allocated in correct sequence.
1991 uint64_t AlignedNumBytes = NumBytes;
1992 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1993 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1994 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1995 assert(!X86FI->getUsesRedZone() &&
1996 "The Red Zone is not accounted for in stack probes");
1997
1998 // Check whether EAX is livein for this block.
1999 bool isEAXAlive = isEAXLiveIn(MBB);
2000
2001 if (isEAXAlive) {
2002 if (Is64Bit) {
2003 // Save RAX
2004 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
2005 .addReg(X86::RAX, RegState::Kill)
2007 } else {
2008 // Save EAX
2009 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
2010 .addReg(X86::EAX, RegState::Kill)
2012 }
2013 }
2014
2015 if (Is64Bit) {
2016 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2017 // Function prologue is responsible for adjusting the stack pointer.
2018 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2019 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
2020 .addImm(Alloc)
2022 } else {
2023 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2024 // We'll also use 4 already allocated bytes for EAX.
2025 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2026 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2028 }
2029
2030 // Call __chkstk, __chkstk_ms, or __alloca.
2031 emitStackProbe(MF, MBB, MBBI, DL, true);
2032
2033 if (isEAXAlive) {
2034 // Restore RAX/EAX
2036 if (Is64Bit)
2037 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2038 StackPtr, false, NumBytes - 8);
2039 else
2040 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2041 StackPtr, false, NumBytes - 4);
2042 MI->setFlag(MachineInstr::FrameSetup);
2043 MBB.insert(MBBI, MI);
2044 }
2045 } else if (NumBytes) {
2046 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2047 }
2048
2049 if (NeedsWinCFI && NumBytes) {
2050 HasWinCFI = true;
2051 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2052 .addImm(NumBytes)
2054 }
2055
2056 int SEHFrameOffset = 0;
2057 unsigned SPOrEstablisher;
2058 if (IsFunclet) {
2059 if (IsClrFunclet) {
2060 // The establisher parameter passed to a CLR funclet is actually a pointer
2061 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2062 // to find the root function establisher frame by loading the PSPSym from
2063 // the intermediate frame.
2064 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2065 MachinePointerInfo NoInfo;
2066 MBB.addLiveIn(Establisher);
2067 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2068 Establisher, false, PSPSlotOffset)
2071 ;
2072 // Save the root establisher back into the current funclet's (mostly
2073 // empty) frame, in case a sub-funclet or the GC needs it.
2074 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2075 false, PSPSlotOffset)
2076 .addReg(Establisher)
2078 NoInfo,
2081 }
2082 SPOrEstablisher = Establisher;
2083 } else {
2084 SPOrEstablisher = StackPtr;
2085 }
2086
2087 if (IsWin64Prologue && HasFP) {
2088 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2089 // this calculation on the incoming establisher, which holds the value of
2090 // RSP from the parent frame at the end of the prologue.
2091 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2092 if (SEHFrameOffset)
2093 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2094 SPOrEstablisher, false, SEHFrameOffset);
2095 else
2096 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2097 .addReg(SPOrEstablisher);
2098
2099 // If this is not a funclet, emit the CFI describing our frame pointer.
2100 if (NeedsWinCFI && !IsFunclet) {
2101 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2102 HasWinCFI = true;
2103 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2105 .addImm(SEHFrameOffset)
2107 if (isAsynchronousEHPersonality(Personality))
2108 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2109 }
2110 } else if (IsFunclet && STI.is32Bit()) {
2111 // Reset EBP / ESI to something good for funclets.
2113 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2114 // into the registration node so that the runtime will restore it for us.
2115 if (!MBB.isCleanupFuncletEntry()) {
2116 assert(Personality == EHPersonality::MSVC_CXX);
2117 Register FrameReg;
2119 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2120 // ESP is the first field, so no extra displacement is needed.
2121 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2122 false, EHRegOffset)
2123 .addReg(X86::ESP);
2124 }
2125 }
2126
2127 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2128 const MachineInstr &FrameInstr = *MBBI;
2129 ++MBBI;
2130
2131 if (NeedsWinCFI) {
2132 int FI;
2133 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2134 if (X86::FR64RegClass.contains(Reg)) {
2135 int Offset;
2136 Register IgnoredFrameReg;
2137 if (IsWin64Prologue && IsFunclet)
2138 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2139 else
2140 Offset =
2141 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2142 SEHFrameOffset;
2143
2144 HasWinCFI = true;
2145 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2146 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2147 .addImm(Reg)
2148 .addImm(Offset)
2150 }
2151 }
2152 }
2153 }
2154
2155 if (NeedsWinCFI && HasWinCFI)
2156 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2158
2159 if (FnHasClrFunclet && !IsFunclet) {
2160 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2161 // immediately after the prolog) into the PSPSlot so that funclets
2162 // and the GC can recover it.
2163 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2164 auto PSPInfo = MachinePointerInfo::getFixedStack(
2166 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2167 PSPSlotOffset)
2172 }
2173
2174 // Realign stack after we spilled callee-saved registers (so that we'll be
2175 // able to calculate their offsets from the frame pointer).
2176 // Win64 requires aligning the stack after the prologue.
2177 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2178 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2179 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2180 }
2181
2182 // We already dealt with stack realignment and funclets above.
2183 if (IsFunclet && STI.is32Bit())
2184 return;
2185
2186 // If we need a base pointer, set it up here. It's whatever the value
2187 // of the stack pointer is at this point. Any variable size objects
2188 // will be allocated after this, so we can still use the base pointer
2189 // to reference locals.
2190 if (TRI->hasBasePointer(MF)) {
2191 // Update the base pointer with the current stack pointer.
2192 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2193 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2194 .addReg(SPOrEstablisher)
2196 if (X86FI->getRestoreBasePointer()) {
2197 // Stash value of base pointer. Saving RSP instead of EBP shortens
2198 // dependence chain. Used by SjLj EH.
2199 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2200 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2202 .addReg(SPOrEstablisher)
2204 }
2205
2206 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2207 // Stash the value of the frame pointer relative to the base pointer for
2208 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2209 // it recovers the frame pointer from the base pointer rather than the
2210 // other way around.
2211 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2212 Register UsedReg;
2213 int Offset =
2214 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2215 .getFixed();
2216 assert(UsedReg == BasePtr);
2217 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2220 }
2221 }
2222 if (ArgBaseReg.isValid()) {
2223 // Save argument base pointer.
2224 auto *MI = X86FI->getStackPtrSaveMI();
2225 int FI = MI->getOperand(1).getIndex();
2226 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2227 // movl %basereg, offset(%ebp)
2228 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2229 .addReg(ArgBaseReg)
2231 }
2232
2233 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2234 // Mark end of stack pointer adjustment.
2235 if (!HasFP && NumBytes) {
2236 // Define the current CFA rule to use the provided offset.
2237 assert(StackSize);
2238 BuildCFI(
2239 MBB, MBBI, DL,
2240 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2242 }
2243
2244 // Emit DWARF info specifying the offsets of the callee-saved registers.
2246 }
2247
2248 // X86 Interrupt handling function cannot assume anything about the direction
2249 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2250 // in each prologue of interrupt handler function.
2251 //
2252 // Create "cld" instruction only in these cases:
2253 // 1. The interrupt handling function uses any of the "rep" instructions.
2254 // 2. Interrupt handling function calls another function.
2255 // 3. If there are any inline asm blocks, as we do not know what they do
2256 //
2257 // TODO: We should also emit cld if we detect the use of std, but as of now,
2258 // the compiler does not even emit that instruction or even define it, so in
2259 // practice, this would only happen with inline asm, which we cover anyway.
2261 bool NeedsCLD = false;
2262
2263 for (const MachineBasicBlock &B : MF) {
2264 for (const MachineInstr &MI : B) {
2265 if (MI.isCall()) {
2266 NeedsCLD = true;
2267 break;
2268 }
2269
2270 if (isOpcodeRep(MI.getOpcode())) {
2271 NeedsCLD = true;
2272 break;
2273 }
2274
2275 if (MI.isInlineAsm()) {
2276 // TODO: Parse asm for rep instructions or call sites?
2277 // For now, let's play it safe and emit a cld instruction
2278 // just in case.
2279 NeedsCLD = true;
2280 break;
2281 }
2282 }
2283 }
2284
2285 if (NeedsCLD) {
2286 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2288 }
2289 }
2290
2291 // At this point we know if the function has WinCFI or not.
2292 MF.setHasWinCFI(HasWinCFI);
2293}
2294
2296 const MachineFunction &MF) const {
2297 // We can't use LEA instructions for adjusting the stack pointer if we don't
2298 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2299 // to deallocate the stack.
2300 // This means that we can use LEA for SP in two situations:
2301 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2302 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2303 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2304}
2305
2307 switch (MI.getOpcode()) {
2308 case X86::CATCHRET:
2309 case X86::CLEANUPRET:
2310 return true;
2311 default:
2312 return false;
2313 }
2314 llvm_unreachable("impossible");
2315}
2316
2317// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2318// stack. It holds a pointer to the bottom of the root function frame. The
2319// establisher frame pointer passed to a nested funclet may point to the
2320// (mostly empty) frame of its parent funclet, but it will need to find
2321// the frame of the root function to access locals. To facilitate this,
2322// every funclet copies the pointer to the bottom of the root function
2323// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2324// same offset for the PSPSym in the root function frame that's used in the
2325// funclets' frames allows each funclet to dynamically accept any ancestor
2326// frame as its establisher argument (the runtime doesn't guarantee the
2327// immediate parent for some reason lost to history), and also allows the GC,
2328// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2329// frame with only a single offset reported for the entire method.
2330unsigned
2331X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2332 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2334 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2335 /*IgnoreSPUpdates*/ true)
2336 .getFixed();
2337 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2338 return static_cast<unsigned>(Offset);
2339}
2340
2341unsigned
2342X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2344 // This is the size of the pushed CSRs.
2345 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2346 // This is the size of callee saved XMMs.
2347 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2348 unsigned XMMSize =
2349 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2350 // This is the amount of stack a funclet needs to allocate.
2351 unsigned UsedSize;
2352 EHPersonality Personality =
2354 if (Personality == EHPersonality::CoreCLR) {
2355 // CLR funclets need to hold enough space to include the PSPSym, at the
2356 // same offset from the stack pointer (immediately after the prolog) as it
2357 // resides at in the main function.
2358 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2359 } else {
2360 // Other funclets just need enough stack for outgoing call arguments.
2361 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2362 }
2363 // RBP is not included in the callee saved register block. After pushing RBP,
2364 // everything is 16 byte aligned. Everything we allocate before an outgoing
2365 // call must also be 16 byte aligned.
2366 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2367 // Subtract out the size of the callee saved registers. This is how much stack
2368 // each funclet will allocate.
2369 return FrameSizeMinusRBP + XMMSize - CSSize;
2370}
2371
2372static bool isTailCallOpcode(unsigned Opc) {
2373 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2374 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2375 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2376}
2377
2379 MachineBasicBlock &MBB) const {
2380 const MachineFrameInfo &MFI = MF.getFrameInfo();
2383 MachineBasicBlock::iterator MBBI = Terminator;
2384 DebugLoc DL;
2385 if (MBBI != MBB.end())
2386 DL = MBBI->getDebugLoc();
2387 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2388 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2390 Register MachineFramePtr =
2391 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2392
2393 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2394 bool NeedsWin64CFI =
2395 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2396 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2397
2398 // Get the number of bytes to allocate from the FrameInfo.
2399 uint64_t StackSize = MFI.getStackSize();
2400 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2401 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2402 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2403 bool HasFP = hasFP(MF);
2404 uint64_t NumBytes = 0;
2405
2406 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2408 MF.needsFrameMoves();
2409
2410 Register ArgBaseReg;
2411 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2412 unsigned Opc = X86::LEA32r;
2413 Register StackReg = X86::ESP;
2414 ArgBaseReg = MI->getOperand(0).getReg();
2415 if (STI.is64Bit()) {
2416 Opc = X86::LEA64r;
2417 StackReg = X86::RSP;
2418 }
2419 // leal -4(%basereg), %esp
2420 // .cfi_def_cfa %esp, 4
2421 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2422 .addUse(ArgBaseReg)
2423 .addImm(1)
2424 .addUse(X86::NoRegister)
2425 .addImm(-(int64_t)SlotSize)
2426 .addUse(X86::NoRegister)
2428 if (NeedsDwarfCFI) {
2429 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2430 BuildCFI(MBB, MBBI, DL,
2431 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2433 --MBBI;
2434 }
2435 --MBBI;
2436 }
2437
2438 if (IsFunclet) {
2439 assert(HasFP && "EH funclets without FP not yet implemented");
2440 NumBytes = getWinEHFuncletFrameSize(MF);
2441 } else if (HasFP) {
2442 // Calculate required stack adjustment.
2443 uint64_t FrameSize = StackSize - SlotSize;
2444 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2445
2446 // Callee-saved registers were pushed on stack before the stack was
2447 // realigned.
2448 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2449 NumBytes = alignTo(FrameSize, MaxAlign);
2450 } else {
2451 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2452 }
2453 uint64_t SEHStackAllocAmt = NumBytes;
2454
2455 // AfterPop is the position to insert .cfi_restore.
2457 if (HasFP) {
2458 if (X86FI->hasSwiftAsyncContext()) {
2459 // Discard the context.
2460 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2461 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2462 }
2463 // Pop EBP.
2464 BuildMI(MBB, MBBI, DL,
2466 MachineFramePtr)
2468
2469 // We need to reset FP to its untagged state on return. Bit 60 is currently
2470 // used to show the presence of an extended frame.
2471 if (X86FI->hasSwiftAsyncContext()) {
2472 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2473 .addUse(MachineFramePtr)
2474 .addImm(60)
2476 }
2477
2478 if (NeedsDwarfCFI) {
2479 if (!ArgBaseReg.isValid()) {
2480 unsigned DwarfStackPtr =
2481 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2482 BuildCFI(MBB, MBBI, DL,
2483 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2485 }
2486 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2487 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2488 BuildCFI(MBB, AfterPop, DL,
2489 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2491 --MBBI;
2492 --AfterPop;
2493 }
2494 --MBBI;
2495 }
2496 }
2497
2498 MachineBasicBlock::iterator FirstCSPop = MBBI;
2499 // Skip the callee-saved pop instructions.
2500 while (MBBI != MBB.begin()) {
2501 MachineBasicBlock::iterator PI = std::prev(MBBI);
2502 unsigned Opc = PI->getOpcode();
2503
2504 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2505 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2506 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2507 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2508 Opc != X86::POP2P && Opc != X86::LEA64r))
2509 break;
2510 FirstCSPop = PI;
2511 }
2512
2513 --MBBI;
2514 }
2515 if (ArgBaseReg.isValid()) {
2516 // Restore argument base pointer.
2517 auto *MI = X86FI->getStackPtrSaveMI();
2518 int FI = MI->getOperand(1).getIndex();
2519 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2520 // movl offset(%ebp), %basereg
2521 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2523 }
2524 MBBI = FirstCSPop;
2525
2526 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2527 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2528
2529 if (MBBI != MBB.end())
2530 DL = MBBI->getDebugLoc();
2531 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2532 // instruction, merge the two instructions.
2533 if (NumBytes || MFI.hasVarSizedObjects())
2534 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2535
2536 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2537 // slot before popping them off! Same applies for the case, when stack was
2538 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2539 // will not do realignment or dynamic stack allocation.
2540 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2541 !IsFunclet) {
2542 if (TRI->hasStackRealignment(MF))
2543 MBBI = FirstCSPop;
2544 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2545 uint64_t LEAAmount =
2546 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2547
2548 if (X86FI->hasSwiftAsyncContext())
2549 LEAAmount -= 16;
2550
2551 // There are only two legal forms of epilogue:
2552 // - add SEHAllocationSize, %rsp
2553 // - lea SEHAllocationSize(%FramePtr), %rsp
2554 //
2555 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2556 // However, we may use this sequence if we have a frame pointer because the
2557 // effects of the prologue can safely be undone.
2558 if (LEAAmount != 0) {
2559 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2561 false, LEAAmount);
2562 --MBBI;
2563 } else {
2564 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2565 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2566 --MBBI;
2567 }
2568 } else if (NumBytes) {
2569 // Adjust stack pointer back: ESP += numbytes.
2570 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2571 if (!HasFP && NeedsDwarfCFI) {
2572 // Define the current CFA rule to use the provided offset.
2573 BuildCFI(MBB, MBBI, DL,
2575 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2577 }
2578 --MBBI;
2579 }
2580
2581 // Windows unwinder will not invoke function's exception handler if IP is
2582 // either in prologue or in epilogue. This behavior causes a problem when a
2583 // call immediately precedes an epilogue, because the return address points
2584 // into the epilogue. To cope with that, we insert an epilogue marker here,
2585 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2586 // final emitted code.
2587 if (NeedsWin64CFI && MF.hasWinCFI())
2588 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2589
2590 if (!HasFP && NeedsDwarfCFI) {
2591 MBBI = FirstCSPop;
2592 int64_t Offset = -(int64_t)CSSize - SlotSize;
2593 // Mark callee-saved pop instruction.
2594 // Define the current CFA rule to use the provided offset.
2595 while (MBBI != MBB.end()) {
2597 unsigned Opc = PI->getOpcode();
2598 ++MBBI;
2599 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2600 Opc == X86::POP2 || Opc == X86::POP2P) {
2601 Offset += SlotSize;
2602 // Compared to pop, pop2 introduces more stack offset (one more
2603 // register).
2604 if (Opc == X86::POP2 || Opc == X86::POP2P)
2605 Offset += SlotSize;
2606 BuildCFI(MBB, MBBI, DL,
2609 }
2610 }
2611 }
2612
2613 // Emit DWARF info specifying the restores of the callee-saved registers.
2614 // For epilogue with return inside or being other block without successor,
2615 // no need to generate .cfi_restore for callee-saved registers.
2616 if (NeedsDwarfCFI && !MBB.succ_empty())
2617 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2618
2619 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2620 // Add the return addr area delta back since we are not tail calling.
2621 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2622 assert(Offset >= 0 && "TCDelta should never be positive");
2623 if (Offset) {
2624 // Check for possible merge with preceding ADD instruction.
2625 Offset += mergeSPUpdates(MBB, Terminator, true);
2626 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2627 }
2628 }
2629
2630 // Emit tilerelease for AMX kernel.
2632 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2633}
2634
2636 int FI,
2637 Register &FrameReg) const {
2638 const MachineFrameInfo &MFI = MF.getFrameInfo();
2639
2640 bool IsFixed = MFI.isFixedObjectIndex(FI);
2641 // We can't calculate offset from frame pointer if the stack is realigned,
2642 // so enforce usage of stack/base pointer. The base pointer is used when we
2643 // have dynamic allocas in addition to dynamic realignment.
2644 if (TRI->hasBasePointer(MF))
2645 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2646 else if (TRI->hasStackRealignment(MF))
2647 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2648 else
2649 FrameReg = TRI->getFrameRegister(MF);
2650
2651 // Offset will hold the offset from the stack pointer at function entry to the
2652 // object.
2653 // We need to factor in additional offsets applied during the prologue to the
2654 // frame, base, and stack pointer depending on which is used.
2657 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2658 uint64_t StackSize = MFI.getStackSize();
2659 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2660 int64_t FPDelta = 0;
2661
2662 // In an x86 interrupt, remove the offset we added to account for the return
2663 // address from any stack object allocated in the caller's frame. Interrupts
2664 // do not have a standard return address. Fixed objects in the current frame,
2665 // such as SSE register spills, should not get this treatment.
2667 Offset >= 0) {
2669 }
2670
2671 if (IsWin64Prologue) {
2672 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2673
2674 // Calculate required stack adjustment.
2675 uint64_t FrameSize = StackSize - SlotSize;
2676 // If required, include space for extra hidden slot for stashing base
2677 // pointer.
2678 if (X86FI->getRestoreBasePointer())
2679 FrameSize += SlotSize;
2680 uint64_t NumBytes = FrameSize - CSSize;
2681
2682 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2683 if (FI && FI == X86FI->getFAIndex())
2684 return StackOffset::getFixed(-SEHFrameOffset);
2685
2686 // FPDelta is the offset from the "traditional" FP location of the old base
2687 // pointer followed by return address and the location required by the
2688 // restricted Win64 prologue.
2689 // Add FPDelta to all offsets below that go through the frame pointer.
2690 FPDelta = FrameSize - SEHFrameOffset;
2691 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2692 "FPDelta isn't aligned per the Win64 ABI!");
2693 }
2694
2695 if (FrameReg == TRI->getFramePtr()) {
2696 // Skip saved EBP/RBP
2697 Offset += SlotSize;
2698
2699 // Account for restricted Windows prologue.
2700 Offset += FPDelta;
2701
2702 // Skip the RETADDR move area
2703 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2704 if (TailCallReturnAddrDelta < 0)
2705 Offset -= TailCallReturnAddrDelta;
2706
2708 }
2709
2710 // FrameReg is either the stack pointer or a base pointer. But the base is
2711 // located at the end of the statically known StackSize so the distinction
2712 // doesn't really matter.
2713 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2714 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2715 return StackOffset::getFixed(Offset + StackSize);
2716}
2717
2719 Register &FrameReg) const {
2720 const MachineFrameInfo &MFI = MF.getFrameInfo();
2722 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2723 const auto it = WinEHXMMSlotInfo.find(FI);
2724
2725 if (it == WinEHXMMSlotInfo.end())
2726 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2727
2728 FrameReg = TRI->getStackRegister();
2730 it->second;
2731}
2732
2735 Register &FrameReg,
2736 int Adjustment) const {
2737 const MachineFrameInfo &MFI = MF.getFrameInfo();
2738 FrameReg = TRI->getStackRegister();
2739 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2740 getOffsetOfLocalArea() + Adjustment);
2741}
2742
2745 int FI, Register &FrameReg,
2746 bool IgnoreSPUpdates) const {
2747
2748 const MachineFrameInfo &MFI = MF.getFrameInfo();
2749 // Does not include any dynamic realign.
2750 const uint64_t StackSize = MFI.getStackSize();
2751 // LLVM arranges the stack as follows:
2752 // ...
2753 // ARG2
2754 // ARG1
2755 // RETADDR
2756 // PUSH RBP <-- RBP points here
2757 // PUSH CSRs
2758 // ~~~~~~~ <-- possible stack realignment (non-win64)
2759 // ...
2760 // STACK OBJECTS
2761 // ... <-- RSP after prologue points here
2762 // ~~~~~~~ <-- possible stack realignment (win64)
2763 //
2764 // if (hasVarSizedObjects()):
2765 // ... <-- "base pointer" (ESI/RBX) points here
2766 // DYNAMIC ALLOCAS
2767 // ... <-- RSP points here
2768 //
2769 // Case 1: In the simple case of no stack realignment and no dynamic
2770 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2771 // with fixed offsets from RSP.
2772 //
2773 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2774 // stack objects are addressed with RBP and regular stack objects with RSP.
2775 //
2776 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2777 // to address stack arguments for outgoing calls and nothing else. The "base
2778 // pointer" points to local variables, and RBP points to fixed objects.
2779 //
2780 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2781 // answer we give is relative to the SP after the prologue, and not the
2782 // SP in the middle of the function.
2783
2784 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2785 !STI.isTargetWin64())
2786 return getFrameIndexReference(MF, FI, FrameReg);
2787
2788 // If !hasReservedCallFrame the function might have SP adjustement in the
2789 // body. So, even though the offset is statically known, it depends on where
2790 // we are in the function.
2791 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2792 return getFrameIndexReference(MF, FI, FrameReg);
2793
2794 // We don't handle tail calls, and shouldn't be seeing them either.
2796 "we don't handle this case!");
2797
2798 // This is how the math works out:
2799 //
2800 // %rsp grows (i.e. gets lower) left to right. Each box below is
2801 // one word (eight bytes). Obj0 is the stack slot we're trying to
2802 // get to.
2803 //
2804 // ----------------------------------
2805 // | BP | Obj0 | Obj1 | ... | ObjN |
2806 // ----------------------------------
2807 // ^ ^ ^ ^
2808 // A B C E
2809 //
2810 // A is the incoming stack pointer.
2811 // (B - A) is the local area offset (-8 for x86-64) [1]
2812 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2813 //
2814 // |(E - B)| is the StackSize (absolute value, positive). For a
2815 // stack that grown down, this works out to be (B - E). [3]
2816 //
2817 // E is also the value of %rsp after stack has been set up, and we
2818 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2819 // (C - E) == (C - A) - (B - A) + (B - E)
2820 // { Using [1], [2] and [3] above }
2821 // == getObjectOffset - LocalAreaOffset + StackSize
2822
2823 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2824}
2825
2828 std::vector<CalleeSavedInfo> &CSI) const {
2829 MachineFrameInfo &MFI = MF.getFrameInfo();
2831
2832 unsigned CalleeSavedFrameSize = 0;
2833 unsigned XMMCalleeSavedFrameSize = 0;
2834 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2835 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2836
2837 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2838
2839 if (TailCallReturnAddrDelta < 0) {
2840 // create RETURNADDR area
2841 // arg
2842 // arg
2843 // RETADDR
2844 // { ...
2845 // RETADDR area
2846 // ...
2847 // }
2848 // [EBP]
2849 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2850 TailCallReturnAddrDelta - SlotSize, true);
2851 }
2852
2853 // Spill the BasePtr if it's used.
2854 if (this->TRI->hasBasePointer(MF)) {
2855 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2856 if (MF.hasEHFunclets()) {
2858 X86FI->setHasSEHFramePtrSave(true);
2859 X86FI->setSEHFramePtrSaveIndex(FI);
2860 }
2861 }
2862
2863 if (hasFP(MF)) {
2864 // emitPrologue always spills frame register the first thing.
2865 SpillSlotOffset -= SlotSize;
2866 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2867
2868 // The async context lives directly before the frame pointer, and we
2869 // allocate a second slot to preserve stack alignment.
2870 if (X86FI->hasSwiftAsyncContext()) {
2871 SpillSlotOffset -= SlotSize;
2872 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2873 SpillSlotOffset -= SlotSize;
2874 }
2875
2876 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2877 // the frame register, we can delete it from CSI list and not have to worry
2878 // about avoiding it later.
2880 for (unsigned i = 0; i < CSI.size(); ++i) {
2881 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2882 CSI.erase(CSI.begin() + i);
2883 break;
2884 }
2885 }
2886 }
2887
2888 // Strategy:
2889 // 1. Use push2 when
2890 // a) number of CSR > 1 if no need padding
2891 // b) number of CSR > 2 if need padding
2892 // 2. When the number of CSR push is odd
2893 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2894 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2895 // 3. When the number of CSR push is even, start to use push2 from the 1st
2896 // push and make the stack 16B aligned before the push
2897 unsigned NumRegsForPush2 = 0;
2898 if (STI.hasPush2Pop2()) {
2899 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2900 return X86::GR64RegClass.contains(I.getReg());
2901 });
2902 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2903 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2904 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2905 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2906 if (X86FI->padForPush2Pop2()) {
2907 SpillSlotOffset -= SlotSize;
2908 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2909 }
2910 }
2911
2912 // Assign slots for GPRs. It increases frame size.
2913 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2914 Register Reg = I.getReg();
2915
2916 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2917 continue;
2918
2919 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2920 // or only an odd number of registers in the candidates.
2921 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2922 (SpillSlotOffset % 16 == 0 ||
2923 X86FI->getNumCandidatesForPush2Pop2() % 2))
2924 X86FI->addCandidateForPush2Pop2(Reg);
2925
2926 SpillSlotOffset -= SlotSize;
2927 CalleeSavedFrameSize += SlotSize;
2928
2929 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2930 I.setFrameIdx(SlotIndex);
2931 }
2932
2933 // Adjust the offset of spill slot as we know the accurate callee saved frame
2934 // size.
2935 if (X86FI->getRestoreBasePointer()) {
2936 SpillSlotOffset -= SlotSize;
2937 CalleeSavedFrameSize += SlotSize;
2938
2939 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2940 // TODO: saving the slot index is better?
2941 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2942 }
2943 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2944 "Expect even candidates for push2/pop2");
2945 if (X86FI->getNumCandidatesForPush2Pop2())
2946 ++NumFunctionUsingPush2Pop2;
2947 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2948 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2949
2950 // Assign slots for XMMs.
2951 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2952 Register Reg = I.getReg();
2953 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2954 continue;
2955
2956 // If this is k-register make sure we lookup via the largest legal type.
2957 MVT VT = MVT::Other;
2958 if (X86::VK16RegClass.contains(Reg))
2959 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2960
2961 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2962 unsigned Size = TRI->getSpillSize(*RC);
2963 Align Alignment = TRI->getSpillAlign(*RC);
2964 // ensure alignment
2965 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2966 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2967
2968 // spill into slot
2969 SpillSlotOffset -= Size;
2970 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2971 I.setFrameIdx(SlotIndex);
2972 MFI.ensureMaxAlignment(Alignment);
2973
2974 // Save the start offset and size of XMM in stack frame for funclets.
2975 if (X86::VR128RegClass.contains(Reg)) {
2976 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2977 XMMCalleeSavedFrameSize += Size;
2978 }
2979 }
2980
2981 return true;
2982}
2983
2988
2989 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2990 // for us, and there are no XMM CSRs on Win32.
2991 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2992 return true;
2993
2994 // Push GPRs. It increases frame size.
2995 const MachineFunction &MF = *MBB.getParent();
2997 if (X86FI->padForPush2Pop2())
2998 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2999
3000 // Update LiveIn of the basic block and decide whether we can add a kill flag
3001 // to the use.
3002 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
3003 const MachineRegisterInfo &MRI = MF.getRegInfo();
3004 // Do not set a kill flag on values that are also marked as live-in. This
3005 // happens with the @llvm-returnaddress intrinsic and with arguments
3006 // passed in callee saved registers.
3007 // Omitting the kill flags is conservatively correct even if the live-in
3008 // is not used after all.
3009 if (MRI.isLiveIn(Reg))
3010 return false;
3011 MBB.addLiveIn(Reg);
3012 // Check if any subregister is live-in
3013 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3014 if (MRI.isLiveIn(*AReg))
3015 return false;
3016 return true;
3017 };
3018 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3019 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
3020 };
3021
3022 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3023 Register Reg = RI->getReg();
3024 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3025 continue;
3026
3027 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3028 Register Reg2 = (++RI)->getReg();
3030 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3031 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3033 } else {
3034 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3035 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3037 }
3038 }
3039
3040 if (X86FI->getRestoreBasePointer()) {
3041 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3042 Register BaseReg = this->TRI->getBaseRegister();
3043 BuildMI(MBB, MI, DL, TII.get(Opc))
3044 .addReg(BaseReg, getKillRegState(true))
3046 }
3047
3048 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3049 // It can be done by spilling XMMs to stack frame.
3050 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3051 Register Reg = I.getReg();
3052 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3053 continue;
3054
3055 // If this is k-register make sure we lookup via the largest legal type.
3056 MVT VT = MVT::Other;
3057 if (X86::VK16RegClass.contains(Reg))
3058 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3059
3060 // Add the callee-saved register as live-in. It's killed at the spill.
3061 MBB.addLiveIn(Reg);
3062 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3063
3064 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3065 Register());
3066 --MI;
3067 MI->setFlag(MachineInstr::FrameSetup);
3068 ++MI;
3069 }
3070
3071 return true;
3072}
3073
3074void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3076 MachineInstr *CatchRet) const {
3077 // SEH shouldn't use catchret.
3080 "SEH should not use CATCHRET");
3081 const DebugLoc &DL = CatchRet->getDebugLoc();
3082 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3083
3084 // Fill EAX/RAX with the address of the target block.
3085 if (STI.is64Bit()) {
3086 // LEA64r CatchRetTarget(%rip), %rax
3087 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3088 .addReg(X86::RIP)
3089 .addImm(0)
3090 .addReg(0)
3091 .addMBB(CatchRetTarget)
3092 .addReg(0);
3093 } else {
3094 // MOV32ri $CatchRetTarget, %eax
3095 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3096 .addMBB(CatchRetTarget);
3097 }
3098
3099 // Record that we've taken the address of CatchRetTarget and no longer just
3100 // reference it in a terminator.
3101 CatchRetTarget->setMachineBlockAddressTaken();
3102}
3103
3107 if (CSI.empty())
3108 return false;
3109
3110 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3111 // Don't restore CSRs in 32-bit EH funclets. Matches
3112 // spillCalleeSavedRegisters.
3113 if (STI.is32Bit())
3114 return true;
3115 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3116 // funclets. emitEpilogue transforms these to normal jumps.
3117 if (MI->getOpcode() == X86::CATCHRET) {
3118 const Function &F = MBB.getParent()->getFunction();
3119 bool IsSEH = isAsynchronousEHPersonality(
3120 classifyEHPersonality(F.getPersonalityFn()));
3121 if (IsSEH)
3122 return true;
3123 }
3124 }
3125
3127
3128 // Reload XMMs from stack frame.
3129 for (const CalleeSavedInfo &I : CSI) {
3130 Register Reg = I.getReg();
3131 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3132 continue;
3133
3134 // If this is k-register make sure we lookup via the largest legal type.
3135 MVT VT = MVT::Other;
3136 if (X86::VK16RegClass.contains(Reg))
3137 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3138
3139 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3140 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3141 Register());
3142 }
3143
3144 // Clear the stack slot for spill base pointer register.
3145 MachineFunction &MF = *MBB.getParent();
3147 if (X86FI->getRestoreBasePointer()) {
3148 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3149 Register BaseReg = this->TRI->getBaseRegister();
3150 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3152 }
3153
3154 // POP GPRs.
3155 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3156 Register Reg = I->getReg();
3157 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3158 continue;
3159
3160 if (X86FI->isCandidateForPush2Pop2(Reg))
3161 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3164 else
3165 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3167 }
3168 if (X86FI->padForPush2Pop2())
3169 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3170
3171 return true;
3172}
3173
3175 BitVector &SavedRegs,
3176 RegScavenger *RS) const {
3178
3179 // Spill the BasePtr if it's used.
3180 if (TRI->hasBasePointer(MF)) {
3181 Register BasePtr = TRI->getBaseRegister();
3182 if (STI.isTarget64BitILP32())
3183 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3184 SavedRegs.set(BasePtr);
3185 }
3186}
3187
3188static bool HasNestArgument(const MachineFunction *MF) {
3189 const Function &F = MF->getFunction();
3190 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3191 I++) {
3192 if (I->hasNestAttr() && !I->use_empty())
3193 return true;
3194 }
3195 return false;
3196}
3197
3198/// GetScratchRegister - Get a temp register for performing work in the
3199/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3200/// and the properties of the function either one or two registers will be
3201/// needed. Set primary to true for the first register, false for the second.
3202static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3203 const MachineFunction &MF, bool Primary) {
3204 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3205
3206 // Erlang stuff.
3207 if (CallingConvention == CallingConv::HiPE) {
3208 if (Is64Bit)
3209 return Primary ? X86::R14 : X86::R13;
3210 else
3211 return Primary ? X86::EBX : X86::EDI;
3212 }
3213
3214 if (Is64Bit) {
3215 if (IsLP64)
3216 return Primary ? X86::R11 : X86::R12;
3217 else
3218 return Primary ? X86::R11D : X86::R12D;
3219 }
3220
3221 bool IsNested = HasNestArgument(&MF);
3222
3223 if (CallingConvention == CallingConv::X86_FastCall ||
3224 CallingConvention == CallingConv::Fast ||
3225 CallingConvention == CallingConv::Tail) {
3226 if (IsNested)
3227 report_fatal_error("Segmented stacks does not support fastcall with "
3228 "nested function.");
3229 return Primary ? X86::EAX : X86::ECX;
3230 }
3231 if (IsNested)
3232 return Primary ? X86::EDX : X86::EAX;
3233 return Primary ? X86::ECX : X86::EAX;
3234}
3235
3236// The stack limit in the TCB is set to this many bytes above the actual stack
3237// limit.
3239
3241 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3242 MachineFrameInfo &MFI = MF.getFrameInfo();
3243 uint64_t StackSize;
3244 unsigned TlsReg, TlsOffset;
3245 DebugLoc DL;
3246
3247 // To support shrink-wrapping we would need to insert the new blocks
3248 // at the right place and update the branches to PrologueMBB.
3249 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3250
3251 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3252 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3253 "Scratch register is live-in");
3254
3255 if (MF.getFunction().isVarArg())
3256 report_fatal_error("Segmented stacks do not support vararg functions.");
3257 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3260 report_fatal_error("Segmented stacks not supported on this platform.");
3261
3262 // Eventually StackSize will be calculated by a link-time pass; which will
3263 // also decide whether checking code needs to be injected into this particular
3264 // prologue.
3265 StackSize = MFI.getStackSize();
3266
3267 if (!MFI.needsSplitStackProlog())
3268 return;
3269
3273 bool IsNested = false;
3274
3275 // We need to know if the function has a nest argument only in 64 bit mode.
3276 if (Is64Bit)
3277 IsNested = HasNestArgument(&MF);
3278
3279 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3280 // allocMBB needs to be last (terminating) instruction.
3281
3282 for (const auto &LI : PrologueMBB.liveins()) {
3283 allocMBB->addLiveIn(LI);
3284 checkMBB->addLiveIn(LI);
3285 }
3286
3287 if (IsNested)
3288 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3289
3290 MF.push_front(allocMBB);
3291 MF.push_front(checkMBB);
3292
3293 // When the frame size is less than 256 we just compare the stack
3294 // boundary directly to the value of the stack pointer, per gcc.
3295 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3296
3297 // Read the limit off the current stacklet off the stack_guard location.
3298 if (Is64Bit) {
3299 if (STI.isTargetLinux()) {
3300 TlsReg = X86::FS;
3301 TlsOffset = IsLP64 ? 0x70 : 0x40;
3302 } else if (STI.isTargetDarwin()) {
3303 TlsReg = X86::GS;
3304 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3305 } else if (STI.isTargetWin64()) {
3306 TlsReg = X86::GS;
3307 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3308 } else if (STI.isTargetFreeBSD()) {
3309 TlsReg = X86::FS;
3310 TlsOffset = 0x18;
3311 } else if (STI.isTargetDragonFly()) {
3312 TlsReg = X86::FS;
3313 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3314 } else {
3315 report_fatal_error("Segmented stacks not supported on this platform.");
3316 }
3317
3318 if (CompareStackPointer)
3319 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3320 else
3321 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3322 ScratchReg)
3323 .addReg(X86::RSP)
3324 .addImm(1)
3325 .addReg(0)
3326 .addImm(-StackSize)
3327 .addReg(0);
3328
3329 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3330 .addReg(ScratchReg)
3331 .addReg(0)
3332 .addImm(1)
3333 .addReg(0)
3334 .addImm(TlsOffset)
3335 .addReg(TlsReg);
3336 } else {
3337 if (STI.isTargetLinux()) {
3338 TlsReg = X86::GS;
3339 TlsOffset = 0x30;
3340 } else if (STI.isTargetDarwin()) {
3341 TlsReg = X86::GS;
3342 TlsOffset = 0x48 + 90 * 4;
3343 } else if (STI.isTargetWin32()) {
3344 TlsReg = X86::FS;
3345 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3346 } else if (STI.isTargetDragonFly()) {
3347 TlsReg = X86::FS;
3348 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3349 } else if (STI.isTargetFreeBSD()) {
3350 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3351 } else {
3352 report_fatal_error("Segmented stacks not supported on this platform.");
3353 }
3354
3355 if (CompareStackPointer)
3356 ScratchReg = X86::ESP;
3357 else
3358 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3359 .addReg(X86::ESP)
3360 .addImm(1)
3361 .addReg(0)
3362 .addImm(-StackSize)
3363 .addReg(0);
3364
3367 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3368 .addReg(ScratchReg)
3369 .addReg(0)
3370 .addImm(0)
3371 .addReg(0)
3372 .addImm(TlsOffset)
3373 .addReg(TlsReg);
3374 } else if (STI.isTargetDarwin()) {
3375
3376 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3377 unsigned ScratchReg2;
3378 bool SaveScratch2;
3379 if (CompareStackPointer) {
3380 // The primary scratch register is available for holding the TLS offset.
3381 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3382 SaveScratch2 = false;
3383 } else {
3384 // Need to use a second register to hold the TLS offset
3385 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3386
3387 // Unfortunately, with fastcc the second scratch register may hold an
3388 // argument.
3389 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3390 }
3391
3392 // If Scratch2 is live-in then it needs to be saved.
3393 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3394 "Scratch register is live-in and not saved");
3395
3396 if (SaveScratch2)
3397 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3398 .addReg(ScratchReg2, RegState::Kill);
3399
3400 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3401 .addImm(TlsOffset);
3402 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3403 .addReg(ScratchReg)
3404 .addReg(ScratchReg2)
3405 .addImm(1)
3406 .addReg(0)
3407 .addImm(0)
3408 .addReg(TlsReg);
3409
3410 if (SaveScratch2)
3411 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3412 }
3413 }
3414
3415 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3416 // It jumps to normal execution of the function body.
3417 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3418 .addMBB(&PrologueMBB)
3420
3421 // On 32 bit we first push the arguments size and then the frame size. On 64
3422 // bit, we pass the stack frame size in r10 and the argument size in r11.
3423 if (Is64Bit) {
3424 // Functions with nested arguments use R10, so it needs to be saved across
3425 // the call to _morestack
3426
3427 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3428 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3429 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3430 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3431
3432 if (IsNested)
3433 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3434
3435 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3436 .addImm(StackSize);
3437 BuildMI(allocMBB, DL,
3439 Reg11)
3440 .addImm(X86FI->getArgumentStackSize());
3441 } else {
3442 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3443 .addImm(X86FI->getArgumentStackSize());
3444 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3445 }
3446
3447 // __morestack is in libgcc
3449 // Under the large code model, we cannot assume that __morestack lives
3450 // within 2^31 bytes of the call site, so we cannot use pc-relative
3451 // addressing. We cannot perform the call via a temporary register,
3452 // as the rax register may be used to store the static chain, and all
3453 // other suitable registers may be either callee-save or used for
3454 // parameter passing. We cannot use the stack at this point either
3455 // because __morestack manipulates the stack directly.
3456 //
3457 // To avoid these issues, perform an indirect call via a read-only memory
3458 // location containing the address.
3459 //
3460 // This solution is not perfect, as it assumes that the .rodata section
3461 // is laid out within 2^31 bytes of each function body, but this seems
3462 // to be sufficient for JIT.
3463 // FIXME: Add retpoline support and remove the error here..
3465 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3466 "code model and thunks not yet implemented.");
3467 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3468 .addReg(X86::RIP)
3469 .addImm(0)
3470 .addReg(0)
3471 .addExternalSymbol("__morestack_addr")
3472 .addReg(0);
3473 } else {
3474 if (Is64Bit)
3475 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3476 .addExternalSymbol("__morestack");
3477 else
3478 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3479 .addExternalSymbol("__morestack");
3480 }
3481
3482 if (IsNested)
3483 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3484 else
3485 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3486
3487 allocMBB->addSuccessor(&PrologueMBB);
3488
3489 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3490 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3491
3492#ifdef EXPENSIVE_CHECKS
3493 MF.verify();
3494#endif
3495}
3496
3497/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3498/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3499/// to fields it needs, through a named metadata node "hipe.literals" containing
3500/// name-value pairs.
3501static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3502 const StringRef LiteralName) {
3503 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3504 MDNode *Node = HiPELiteralsMD->getOperand(i);
3505 if (Node->getNumOperands() != 2)
3506 continue;
3507 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3508 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3509 if (!NodeName || !NodeVal)
3510 continue;
3511 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3512 if (ValConst && NodeName->getString() == LiteralName) {
3513 return ValConst->getZExtValue();
3514 }
3515 }
3516
3517 report_fatal_error("HiPE literal " + LiteralName +
3518 " required but not provided");
3519}
3520
3521// Return true if there are no non-ehpad successors to MBB and there are no
3522// non-meta instructions between MBBI and MBB.end().
3525 return llvm::all_of(
3526 MBB.successors(),
3527 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3528 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3529 return MI.isMetaInstruction();
3530 });
3531}
3532
3533/// Erlang programs may need a special prologue to handle the stack size they
3534/// might need at runtime. That is because Erlang/OTP does not implement a C
3535/// stack but uses a custom implementation of hybrid stack/heap architecture.
3536/// (for more information see Eric Stenman's Ph.D. thesis:
3537/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3538///
3539/// CheckStack:
3540/// temp0 = sp - MaxStack
3541/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3542/// OldStart:
3543/// ...
3544/// IncStack:
3545/// call inc_stack # doubles the stack space
3546/// temp0 = sp - MaxStack
3547/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3549 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3550 MachineFrameInfo &MFI = MF.getFrameInfo();
3551 DebugLoc DL;
3552
3553 // To support shrink-wrapping we would need to insert the new blocks
3554 // at the right place and update the branches to PrologueMBB.
3555 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3556
3557 // HiPE-specific values
3558 NamedMDNode *HiPELiteralsMD =
3559 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3560 if (!HiPELiteralsMD)
3562 "Can't generate HiPE prologue without runtime parameters");
3563 const unsigned HipeLeafWords = getHiPELiteral(
3564 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3565 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3566 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3567 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3568 ? MF.getFunction().arg_size() - CCRegisteredArgs
3569 : 0;
3570 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3571
3573 "HiPE prologue is only supported on Linux operating systems.");
3574
3575 // Compute the largest caller's frame that is needed to fit the callees'
3576 // frames. This 'MaxStack' is computed from:
3577 //
3578 // a) the fixed frame size, which is the space needed for all spilled temps,
3579 // b) outgoing on-stack parameter areas, and
3580 // c) the minimum stack space this function needs to make available for the
3581 // functions it calls (a tunable ABI property).
3582 if (MFI.hasCalls()) {
3583 unsigned MoreStackForCalls = 0;
3584
3585 for (auto &MBB : MF) {
3586 for (auto &MI : MBB) {
3587 if (!MI.isCall())
3588 continue;
3589
3590 // Get callee operand.
3591 const MachineOperand &MO = MI.getOperand(0);
3592
3593 // Only take account of global function calls (no closures etc.).
3594 if (!MO.isGlobal())
3595 continue;
3596
3597 const Function *F = dyn_cast<Function>(MO.getGlobal());
3598 if (!F)
3599 continue;
3600
3601 // Do not update 'MaxStack' for primitive and built-in functions
3602 // (encoded with names either starting with "erlang."/"bif_" or not
3603 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3604 // "_", such as the BIF "suspend_0") as they are executed on another
3605 // stack.
3606 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3607 F->getName().find_first_of("._") == StringRef::npos)
3608 continue;
3609
3610 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3611 ? F->arg_size() - CCRegisteredArgs
3612 : 0;
3613 if (HipeLeafWords - 1 > CalleeStkArity)
3614 MoreStackForCalls =
3615 std::max(MoreStackForCalls,
3616 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3617 }
3618 }
3619 MaxStack += MoreStackForCalls;
3620 }
3621
3622 // If the stack frame needed is larger than the guaranteed then runtime checks
3623 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3624 if (MaxStack > Guaranteed) {
3625 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3626 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3627
3628 for (const auto &LI : PrologueMBB.liveins()) {
3629 stackCheckMBB->addLiveIn(LI);
3630 incStackMBB->addLiveIn(LI);
3631 }
3632
3633 MF.push_front(incStackMBB);
3634 MF.push_front(stackCheckMBB);
3635
3636 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3637 unsigned LEAop, CMPop, CALLop;
3638 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3639 if (Is64Bit) {
3640 SPReg = X86::RSP;
3641 PReg = X86::RBP;
3642 LEAop = X86::LEA64r;
3643 CMPop = X86::CMP64rm;
3644 CALLop = X86::CALL64pcrel32;
3645 } else {
3646 SPReg = X86::ESP;
3647 PReg = X86::EBP;
3648 LEAop = X86::LEA32r;
3649 CMPop = X86::CMP32rm;
3650 CALLop = X86::CALLpcrel32;
3651 }
3652
3653 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3654 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3655 "HiPE prologue scratch register is live-in");
3656
3657 // Create new MBB for StackCheck:
3658 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3659 false, -MaxStack);
3660 // SPLimitOffset is in a fixed heap location (pointed by BP).
3661 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3662 PReg, false, SPLimitOffset);
3663 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3664 .addMBB(&PrologueMBB)
3666
3667 // Create new MBB for IncStack:
3668 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3669 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3670 false, -MaxStack);
3671 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3672 PReg, false, SPLimitOffset);
3673 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3674 .addMBB(incStackMBB)
3676
3677 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3678 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3679 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3680 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3681 }
3682#ifdef EXPENSIVE_CHECKS
3683 MF.verify();
3684#endif
3685}
3686
3687bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3689 const DebugLoc &DL,
3690 int Offset) const {
3691 if (Offset <= 0)
3692 return false;
3693
3694 if (Offset % SlotSize)
3695 return false;
3696
3697 int NumPops = Offset / SlotSize;
3698 // This is only worth it if we have at most 2 pops.
3699 if (NumPops != 1 && NumPops != 2)
3700 return false;
3701
3702 // Handle only the trivial case where the adjustment directly follows
3703 // a call. This is the most common one, anyway.
3704 if (MBBI == MBB.begin())
3705 return false;
3706 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3707 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3708 return false;
3709
3710 unsigned Regs[2];
3711 unsigned FoundRegs = 0;
3712
3714 const MachineOperand &RegMask = Prev->getOperand(1);
3715
3716 auto &RegClass =
3717 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3718 // Try to find up to NumPops free registers.
3719 for (auto Candidate : RegClass) {
3720 // Poor man's liveness:
3721 // Since we're immediately after a call, any register that is clobbered
3722 // by the call and not defined by it can be considered dead.
3723 if (!RegMask.clobbersPhysReg(Candidate))
3724 continue;
3725
3726 // Don't clobber reserved registers
3727 if (MRI.isReserved(Candidate))
3728 continue;
3729
3730 bool IsDef = false;
3731 for (const MachineOperand &MO : Prev->implicit_operands()) {
3732 if (MO.isReg() && MO.isDef() &&
3733 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3734 IsDef = true;
3735 break;
3736 }
3737 }
3738
3739 if (IsDef)
3740 continue;
3741
3742 Regs[FoundRegs++] = Candidate;
3743 if (FoundRegs == (unsigned)NumPops)
3744 break;
3745 }
3746
3747 if (FoundRegs == 0)
3748 return false;
3749
3750 // If we found only one free register, but need two, reuse the same one twice.
3751 while (FoundRegs < (unsigned)NumPops)
3752 Regs[FoundRegs++] = Regs[0];
3753
3754 for (int i = 0; i < NumPops; ++i)
3755 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3756 Regs[i]);
3757
3758 return true;
3759}
3760
3764 bool reserveCallFrame = hasReservedCallFrame(MF);
3765 unsigned Opcode = I->getOpcode();
3766 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3767 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3768 uint64_t Amount = TII.getFrameSize(*I);
3769 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3770 I = MBB.erase(I);
3771 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3772
3773 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3774 // typically because the function is marked noreturn (abort, throw,
3775 // assert_fail, etc).
3776 if (isDestroy && blockEndIsUnreachable(MBB, I))
3777 return I;
3778
3779 if (!reserveCallFrame) {
3780 // If the stack pointer can be changed after prologue, turn the
3781 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3782 // adjcallstackdown instruction into 'add ESP, <amt>'
3783
3784 // We need to keep the stack aligned properly. To do this, we round the
3785 // amount of space needed for the outgoing arguments up to the next
3786 // alignment boundary.
3787 Amount = alignTo(Amount, getStackAlign());
3788
3789 const Function &F = MF.getFunction();
3790 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3791 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3792
3793 // If we have any exception handlers in this function, and we adjust
3794 // the SP before calls, we may need to indicate this to the unwinder
3795 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3796 // Amount == 0, because the preceding function may have set a non-0
3797 // GNU_ARGS_SIZE.
3798 // TODO: We don't need to reset this between subsequent functions,
3799 // if it didn't change.
3800 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3801
3802 if (HasDwarfEHHandlers && !isDestroy &&
3804 BuildCFI(MBB, InsertPos, DL,
3805 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3806
3807 if (Amount == 0)
3808 return I;
3809
3810 // Factor out the amount that gets handled inside the sequence
3811 // (Pushes of argument for frame setup, callee pops for frame destroy)
3812 Amount -= InternalAmt;
3813
3814 // TODO: This is needed only if we require precise CFA.
3815 // If this is a callee-pop calling convention, emit a CFA adjust for
3816 // the amount the callee popped.
3817 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3818 BuildCFI(MBB, InsertPos, DL,
3819 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3820
3821 // Add Amount to SP to destroy a frame, or subtract to setup.
3822 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3823
3824 if (StackAdjustment) {
3825 // Merge with any previous or following adjustment instruction. Note: the
3826 // instructions merged with here do not have CFI, so their stack
3827 // adjustments do not feed into CfaAdjustment.
3828 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3829 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3830
3831 if (StackAdjustment) {
3832 if (!(F.hasMinSize() &&
3833 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3834 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3835 /*InEpilogue=*/false);
3836 }
3837 }
3838
3839 if (DwarfCFI && !hasFP(MF)) {
3840 // If we don't have FP, but need to generate unwind information,
3841 // we need to set the correct CFA offset after the stack adjustment.
3842 // How much we adjust the CFA offset depends on whether we're emitting
3843 // CFI only for EH purposes or for debugging. EH only requires the CFA
3844 // offset to be correct at each call site, while for debugging we want
3845 // it to be more precise.
3846
3847 int64_t CfaAdjustment = -StackAdjustment;
3848 // TODO: When not using precise CFA, we also need to adjust for the
3849 // InternalAmt here.
3850 if (CfaAdjustment) {
3851 BuildCFI(
3852 MBB, InsertPos, DL,
3853 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3854 }
3855 }
3856
3857 return I;
3858 }
3859
3860 if (InternalAmt) {
3863 while (CI != B && !std::prev(CI)->isCall())
3864 --CI;
3865 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3866 }
3867
3868 return I;
3869}
3870
3872 assert(MBB.getParent() && "Block is not attached to a function!");
3873 const MachineFunction &MF = *MBB.getParent();
3874 if (!MBB.isLiveIn(X86::EFLAGS))
3875 return true;
3876
3877 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3878 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3880 const X86TargetLowering &TLI = *STI.getTargetLowering();
3881 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3882 return false;
3883
3885 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3886}
3887
3889 assert(MBB.getParent() && "Block is not attached to a function!");
3890
3891 // Win64 has strict requirements in terms of epilogue and we are
3892 // not taking a chance at messing with them.
3893 // I.e., unless this block is already an exit block, we can't use
3894 // it as an epilogue.
3895 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3896 return false;
3897
3898 // Swift async context epilogue has a BTR instruction that clobbers parts of
3899 // EFLAGS.
3900 const MachineFunction &MF = *MBB.getParent();
3903
3905 return true;
3906
3907 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3908 // clobbers the EFLAGS. Check that we do not need to preserve it,
3909 // otherwise, conservatively assume this is not
3910 // safe to insert the epilogue here.
3912}
3913
3915 // If we may need to emit frameless compact unwind information, give
3916 // up as this is currently broken: PR25614.
3917 bool CompactUnwind =
3919 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3920 !CompactUnwind) &&
3921 // The lowering of segmented stack and HiPE only support entry
3922 // blocks as prologue blocks: PR26107. This limitation may be
3923 // lifted if we fix:
3924 // - adjustForSegmentedStacks
3925 // - adjustForHiPEPrologue
3927 !MF.shouldSplitStack();
3928}
3929
3932 const DebugLoc &DL, bool RestoreSP) const {
3933 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3934 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3935 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3936 "restoring EBP/ESI on non-32-bit target");
3937
3938 MachineFunction &MF = *MBB.getParent();
3940 Register BasePtr = TRI->getBaseRegister();
3941 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3943 MachineFrameInfo &MFI = MF.getFrameInfo();
3944
3945 // FIXME: Don't set FrameSetup flag in catchret case.
3946
3947 int FI = FuncInfo.EHRegNodeFrameIndex;
3948 int EHRegSize = MFI.getObjectSize(FI);
3949
3950 if (RestoreSP) {
3951 // MOV32rm -EHRegSize(%ebp), %esp
3952 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3953 X86::EBP, true, -EHRegSize)
3955 }
3956
3957 Register UsedReg;
3958 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3959 int EndOffset = -EHRegOffset - EHRegSize;
3960 FuncInfo.EHRegNodeEndOffset = EndOffset;
3961
3962 if (UsedReg == FramePtr) {
3963 // ADD $offset, %ebp
3964 unsigned ADDri = getADDriOpcode(false);
3965 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3967 .addImm(EndOffset)
3969 ->getOperand(3)
3970 .setIsDead();
3971 assert(EndOffset >= 0 &&
3972 "end of registration object above normal EBP position!");
3973 } else if (UsedReg == BasePtr) {
3974 // LEA offset(%ebp), %esi
3975 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3976 FramePtr, false, EndOffset)
3978 // MOV32rm SavedEBPOffset(%esi), %ebp
3979 assert(X86FI->getHasSEHFramePtrSave());
3980 int Offset =
3981 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3982 .getFixed();
3983 assert(UsedReg == BasePtr);
3984 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3985 UsedReg, true, Offset)
3987 } else {
3988 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3989 }
3990 return MBBI;
3991}
3992
3994 return TRI->getSlotSize();
3995}
3996
3999 return StackPtr;
4000}
4001
4005 Register FrameRegister = RI->getFrameRegister(MF);
4006 if (getInitialCFARegister(MF) == FrameRegister &&
4008 DwarfFrameBase FrameBase;
4009 FrameBase.Kind = DwarfFrameBase::CFA;
4010 FrameBase.Location.Offset =
4012 return FrameBase;
4013 }
4014
4015 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
4016}
4017
4018namespace {
4019// Struct used by orderFrameObjects to help sort the stack objects.
4020struct X86FrameSortingObject {
4021 bool IsValid = false; // true if we care about this Object.
4022 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4023 unsigned ObjectSize = 0; // Size of Object in bytes.
4024 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4025 unsigned ObjectNumUses = 0; // Object static number of uses.
4026};
4027
4028// The comparison function we use for std::sort to order our local
4029// stack symbols. The current algorithm is to use an estimated
4030// "density". This takes into consideration the size and number of
4031// uses each object has in order to roughly minimize code size.
4032// So, for example, an object of size 16B that is referenced 5 times
4033// will get higher priority than 4 4B objects referenced 1 time each.
4034// It's not perfect and we may be able to squeeze a few more bytes out of
4035// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4036// fringe end can have special consideration, given their size is less
4037// important, etc.), but the algorithmic complexity grows too much to be
4038// worth the extra gains we get. This gets us pretty close.
4039// The final order leaves us with objects with highest priority going
4040// at the end of our list.
4041struct X86FrameSortingComparator {
4042 inline bool operator()(const X86FrameSortingObject &A,
4043 const X86FrameSortingObject &B) const {
4044 uint64_t DensityAScaled, DensityBScaled;
4045
4046 // For consistency in our comparison, all invalid objects are placed
4047 // at the end. This also allows us to stop walking when we hit the
4048 // first invalid item after it's all sorted.
4049 if (!A.IsValid)
4050 return false;
4051 if (!B.IsValid)
4052 return true;
4053
4054 // The density is calculated by doing :
4055 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4056 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4057 // Since this approach may cause inconsistencies in
4058 // the floating point <, >, == comparisons, depending on the floating
4059 // point model with which the compiler was built, we're going
4060 // to scale both sides by multiplying with
4061 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4062 // the division and, with it, the need for any floating point
4063 // arithmetic.
4064 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4065 static_cast<uint64_t>(B.ObjectSize);
4066 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4067 static_cast<uint64_t>(A.ObjectSize);
4068
4069 // If the two densities are equal, prioritize highest alignment
4070 // objects. This allows for similar alignment objects
4071 // to be packed together (given the same density).
4072 // There's room for improvement here, also, since we can pack
4073 // similar alignment (different density) objects next to each
4074 // other to save padding. This will also require further
4075 // complexity/iterations, and the overall gain isn't worth it,
4076 // in general. Something to keep in mind, though.
4077 if (DensityAScaled == DensityBScaled)
4078 return A.ObjectAlignment < B.ObjectAlignment;
4079
4080 return DensityAScaled < DensityBScaled;
4081 }
4082};
4083} // namespace
4084
4085// Order the symbols in the local stack.
4086// We want to place the local stack objects in some sort of sensible order.
4087// The heuristic we use is to try and pack them according to static number
4088// of uses and size of object in order to minimize code size.
4090 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4091 const MachineFrameInfo &MFI = MF.getFrameInfo();
4092
4093 // Don't waste time if there's nothing to do.
4094 if (ObjectsToAllocate.empty())
4095 return;
4096
4097 // Create an array of all MFI objects. We won't need all of these
4098 // objects, but we're going to create a full array of them to make
4099 // it easier to index into when we're counting "uses" down below.
4100 // We want to be able to easily/cheaply access an object by simply
4101 // indexing into it, instead of having to search for it every time.
4102 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4103
4104 // Walk the objects we care about and mark them as such in our working
4105 // struct.
4106 for (auto &Obj : ObjectsToAllocate) {
4107 SortingObjects[Obj].IsValid = true;
4108 SortingObjects[Obj].ObjectIndex = Obj;
4109 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4110 // Set the size.
4111 int ObjectSize = MFI.getObjectSize(Obj);
4112 if (ObjectSize == 0)
4113 // Variable size. Just use 4.
4114 SortingObjects[Obj].ObjectSize = 4;
4115 else
4116 SortingObjects[Obj].ObjectSize = ObjectSize;
4117 }
4118
4119 // Count the number of uses for each object.
4120 for (auto &MBB : MF) {
4121 for (auto &MI : MBB) {
4122 if (MI.isDebugInstr())
4123 continue;
4124 for (const MachineOperand &MO : MI.operands()) {
4125 // Check to see if it's a local stack symbol.
4126 if (!MO.isFI())
4127 continue;
4128 int Index = MO.getIndex();
4129 // Check to see if it falls within our range, and is tagged
4130 // to require ordering.
4131 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4132 SortingObjects[Index].IsValid)
4133 SortingObjects[Index].ObjectNumUses++;
4134 }
4135 }
4136 }
4137
4138 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4139 // info).
4140 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4141
4142 // Now modify the original list to represent the final order that
4143 // we want. The order will depend on whether we're going to access them
4144 // from the stack pointer or the frame pointer. For SP, the list should
4145 // end up with the END containing objects that we want with smaller offsets.
4146 // For FP, it should be flipped.
4147 int i = 0;
4148 for (auto &Obj : SortingObjects) {
4149 // All invalid items are sorted at the end, so it's safe to stop.
4150 if (!Obj.IsValid)
4151 break;
4152 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4153 }
4154
4155 // Flip it if we're accessing off of the FP.
4156 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4157 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4158}
4159
4160unsigned
4162 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4163 unsigned Offset = 16;
4164 // RBP is immediately pushed.
4165 Offset += SlotSize;
4166 // All callee-saved registers are then pushed.
4167 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4168 // Every funclet allocates enough stack space for the largest outgoing call.
4169 Offset += getWinEHFuncletFrameSize(MF);
4170 return Offset;
4171}
4172
4174 MachineFunction &MF, RegScavenger *RS) const {
4175 // Mark the function as not having WinCFI. We will set it back to true in
4176 // emitPrologue if it gets called and emits CFI.
4177 MF.setHasWinCFI(false);
4178
4179 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4180 // aligned. The format doesn't support misaligned stack adjustments.
4183
4184 // If this function isn't doing Win64-style C++ EH, we don't need to do
4185 // anything.
4186 if (STI.is64Bit() && MF.hasEHFunclets() &&
4189 adjustFrameForMsvcCxxEh(MF);
4190 }
4191}
4192
4193void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4194 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4195 // relative to RSP after the prologue. Find the offset of the last fixed
4196 // object, so that we can allocate a slot immediately following it. If there
4197 // were no fixed objects, use offset -SlotSize, which is immediately after the
4198 // return address. Fixed objects have negative frame indices.
4199 MachineFrameInfo &MFI = MF.getFrameInfo();
4200 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4201 int64_t MinFixedObjOffset = -SlotSize;
4202 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4203 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4204
4205 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4206 for (WinEHHandlerType &H : TBME.HandlerArray) {
4207 int FrameIndex = H.CatchObj.FrameIndex;
4208 if (FrameIndex != INT_MAX) {
4209 // Ensure alignment.
4210 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4211 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4212 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4213 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4214 }
4215 }
4216 }
4217
4218 // Ensure alignment.
4219 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4220 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4221 int UnwindHelpFI =
4222 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4223 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4224
4225 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4226 // other frame setup instructions.
4227 MachineBasicBlock &MBB = MF.front();
4228 auto MBBI = MBB.begin();
4229 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4230 ++MBBI;
4231
4233 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4234 UnwindHelpFI)
4235 .addImm(-2);
4236}
4237
4239 MachineFunction &MF, RegScavenger *RS) const {
4240 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4241
4242 if (STI.is32Bit() && MF.hasEHFunclets())
4244 // We have emitted prolog and epilog. Don't need stack pointer saving
4245 // instruction any more.
4246 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4247 MI->eraseFromParent();
4248 X86FI->setStackPtrSaveMI(nullptr);
4249 }
4250}
4251
4253 MachineFunction &MF) const {
4254 // 32-bit functions have to restore stack pointers when control is transferred
4255 // back to the parent function. These blocks are identified as eh pads that
4256 // are not funclet entries.
4257 bool IsSEH = isAsynchronousEHPersonality(
4259 for (MachineBasicBlock &MBB : MF) {
4260 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4261 if (NeedsRestore)
4263 /*RestoreSP=*/IsSEH);
4264 }
4265}
4266
4267// Compute the alignment gap between current SP after spilling FP/BP and the
4268// next properly aligned stack offset.
4270 const TargetRegisterClass *RC,
4271 unsigned NumSpilledRegs) {
4273 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4274 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4275 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4276 return AlignedSize - AllocSize;
4277}
4278
4279void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4281 Register FP, Register BP,
4282 int SPAdjust) const {
4283 assert(FP.isValid() || BP.isValid());
4284
4285 MachineBasicBlock *MBB = BeforeMI->getParent();
4286 DebugLoc DL = BeforeMI->getDebugLoc();
4287
4288 // Spill FP.
4289 if (FP.isValid()) {
4290 BuildMI(*MBB, BeforeMI, DL,
4292 .addReg(FP);
4293 }
4294
4295 // Spill BP.
4296 if (BP.isValid()) {
4297 BuildMI(*MBB, BeforeMI, DL,
4299 .addReg(BP);
4300 }
4301
4302 // Make sure SP is aligned.
4303 if (SPAdjust)
4304 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4305
4306 // Emit unwinding information.
4307 if (FP.isValid() && needsDwarfCFI(MF)) {
4308 // Emit .cfi_remember_state to remember old frame.
4309 unsigned CFIIndex =
4311 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4312 .addCFIIndex(CFIIndex);
4313
4314 // Setup new CFA value with DW_CFA_def_cfa_expression:
4315 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4316 SmallString<64> CfaExpr;
4317 uint8_t buffer[16];
4318 int Offset = SPAdjust;
4319 if (BP.isValid())
4320 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4321 // If BeforeMI is a frame setup instruction, we need to adjust the position
4322 // and offset of the new cfi instruction.
4323 if (TII.isFrameSetup(*BeforeMI)) {
4324 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4325 BeforeMI = std::next(BeforeMI);
4326 }
4328 if (STI.isTarget64BitILP32())
4330 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4331 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4332 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4333 CfaExpr.push_back(dwarf::DW_OP_deref);
4334 CfaExpr.push_back(dwarf::DW_OP_consts);
4335 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4336 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4337
4338 SmallString<64> DefCfaExpr;
4339 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4340 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4341 DefCfaExpr.append(CfaExpr.str());
4342 BuildCFI(*MBB, BeforeMI, DL,
4343 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4345 }
4346}
4347
4348void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4350 Register FP, Register BP,
4351 int SPAdjust) const {
4352 assert(FP.isValid() || BP.isValid());
4353
4354 // Adjust SP so it points to spilled FP or BP.
4355 MachineBasicBlock *MBB = AfterMI->getParent();
4356 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4357 DebugLoc DL = AfterMI->getDebugLoc();
4358 if (SPAdjust)
4359 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4360
4361 // Restore BP.
4362 if (BP.isValid()) {
4363 BuildMI(*MBB, Pos, DL,
4364 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4365 }
4366
4367 // Restore FP.
4368 if (FP.isValid()) {
4369 BuildMI(*MBB, Pos, DL,
4371
4372 // Emit unwinding information.
4373 if (needsDwarfCFI(MF)) {
4374 // Restore original frame with .cfi_restore_state.
4375 unsigned CFIIndex =
4377 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4378 .addCFIIndex(CFIIndex);
4379 }
4380 }
4381}
4382
4383void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4385 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4386 assert(SpillFP || SpillBP);
4387
4388 Register FP, BP;
4389 const TargetRegisterClass *RC;
4390 unsigned NumRegs = 0;
4391
4392 if (SpillFP) {
4393 FP = TRI->getFrameRegister(MF);
4394 if (STI.isTarget64BitILP32())
4396 RC = TRI->getMinimalPhysRegClass(FP);
4397 ++NumRegs;
4398 }
4399 if (SpillBP) {
4400 BP = TRI->getBaseRegister();
4401 if (STI.isTarget64BitILP32())
4402 BP = Register(getX86SubSuperRegister(BP, 64));
4403 RC = TRI->getMinimalPhysRegClass(BP);
4404 ++NumRegs;
4405 }
4406 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4407
4408 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4409 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4410}
4411
4412bool X86FrameLowering::skipSpillFPBP(
4414 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4415 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4416 // SaveRbx = COPY RBX
4417 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4418 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4419 // We should skip this instruction sequence.
4420 int FI;
4421 unsigned Reg;
4422 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4423 MI->getOperand(1).getReg() == X86::RBX) &&
4424 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4425 ++MI;
4426 return true;
4427 }
4428 return false;
4429}
4430
4432 const TargetRegisterInfo *TRI, bool &AccessFP,
4433 bool &AccessBP) {
4434 AccessFP = AccessBP = false;
4435 if (FP) {
4436 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4437 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4438 AccessFP = true;
4439 }
4440 if (BP) {
4441 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4442 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4443 AccessBP = true;
4444 }
4445 return AccessFP || AccessBP;
4446}
4447
4448// Invoke instruction has been lowered to normal function call. We try to figure
4449// out if MI comes from Invoke.
4450// Do we have any better method?
4451static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4452 if (!MI.isCall())
4453 return false;
4454 if (InsideEHLabels)
4455 return true;
4456
4457 const MachineBasicBlock *MBB = MI.getParent();
4458 if (!MBB->hasEHPadSuccessor())
4459 return false;
4460
4461 // Check if there is another call instruction from MI to the end of MBB.
4463 for (++MBBI; MBBI != ME; ++MBBI)
4464 if (MBBI->isCall())
4465 return false;
4466 return true;
4467}
4468
4469/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4470/// interfered stack access in the range, usually generated by register spill.
4471void X86FrameLowering::checkInterferedAccess(
4473 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4474 bool SpillBP) const {
4475 if (DefMI == KillMI)
4476 return;
4477 if (TRI->hasBasePointer(MF)) {
4478 if (!SpillBP)
4479 return;
4480 } else {
4481 if (!SpillFP)
4482 return;
4483 }
4484
4485 auto MI = KillMI;
4486 while (MI != DefMI) {
4487 if (any_of(MI->operands(),
4488 [](const MachineOperand &MO) { return MO.isFI(); }))
4490 "Interference usage of base pointer/frame "
4491 "pointer.");
4492 MI++;
4493 }
4494}
4495
4496/// If a function uses base pointer and the base pointer is clobbered by inline
4497/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4498/// contains garbage value.
4499/// For example if a 32b x86 function uses base pointer esi, and esi is
4500/// clobbered by following inline asm
4501/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4502/// We need to save esi before the asm and restore it after the asm.
4503///
4504/// The problem can also occur to frame pointer if there is a function call, and
4505/// the callee uses a different calling convention and clobbers the fp.
4506///
4507/// Because normal frame objects (spill slots) are accessed through fp/bp
4508/// register, so we can't spill fp/bp to normal spill slots.
4509///
4510/// FIXME: There are 2 possible enhancements:
4511/// 1. In many cases there are different physical registers not clobbered by
4512/// inline asm, we can use one of them as base pointer. Or use a virtual
4513/// register as base pointer and let RA allocate a physical register to it.
4514/// 2. If there is no other instructions access stack with fp/bp from the
4515/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4516/// skip the save and restore operations.
4518 Register FP, BP;
4520 if (TFI.hasFP(MF))
4521 FP = TRI->getFrameRegister(MF);
4522 if (TRI->hasBasePointer(MF))
4523 BP = TRI->getBaseRegister();
4524
4525 // Currently only inline asm and function call can clobbers fp/bp. So we can
4526 // do some quick test and return early.
4527 if (!MF.hasInlineAsm()) {
4529 if (!X86FI->getFPClobberedByCall())
4530 FP = 0;
4531 if (!X86FI->getBPClobberedByCall())
4532 BP = 0;
4533 }
4534 if (!FP && !BP)
4535 return;
4536
4537 for (MachineBasicBlock &MBB : MF) {
4538 bool InsideEHLabels = false;
4539 auto MI = MBB.rbegin(), ME = MBB.rend();
4540 auto TermMI = MBB.getFirstTerminator();
4541 if (TermMI == MBB.begin())
4542 continue;
4543 MI = *(std::prev(TermMI));
4544
4545 while (MI != ME) {
4546 // Skip frame setup/destroy instructions.
4547 // Skip Invoke (call inside try block) instructions.
4548 // Skip instructions handled by target.
4549 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4551 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4552 ++MI;
4553 continue;
4554 }
4555
4556 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4557 InsideEHLabels = !InsideEHLabels;
4558 ++MI;
4559 continue;
4560 }
4561
4562 bool AccessFP, AccessBP;
4563 // Check if fp or bp is used in MI.
4564 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4565 ++MI;
4566 continue;
4567 }
4568
4569 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4570 // used.
4571 bool FPLive = false, BPLive = false;
4572 bool SpillFP = false, SpillBP = false;
4573 auto DefMI = MI, KillMI = MI;
4574 do {
4575 SpillFP |= AccessFP;
4576 SpillBP |= AccessBP;
4577
4578 // Maintain FPLive and BPLive.
4579 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4580 FPLive = false;
4581 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4582 FPLive = true;
4583 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4584 BPLive = false;
4585 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4586 BPLive = true;
4587
4588 DefMI = MI++;
4589 } while ((MI != ME) &&
4590 (FPLive || BPLive ||
4591 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4592
4593 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4594 if (FPLive && !SpillBP)
4595 continue;
4596
4597 // If the bp is clobbered by a call, we should save and restore outside of
4598 // the frame setup instructions.
4599 if (KillMI->isCall() && DefMI != ME) {
4600 auto FrameSetup = std::next(DefMI);
4601 // Look for frame setup instruction toward the start of the BB.
4602 // If we reach another call instruction, it means no frame setup
4603 // instruction for the current call instruction.
4604 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4605 !FrameSetup->isCall())
4606 ++FrameSetup;
4607 // If a frame setup instruction is found, we need to find out the
4608 // corresponding frame destroy instruction.
4609 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4610 (TII.getFrameSize(*FrameSetup) ||
4611 TII.getFrameAdjustment(*FrameSetup))) {
4612 while (!TII.isFrameInstr(*KillMI))
4613 --KillMI;
4614 DefMI = FrameSetup;
4615 MI = DefMI;
4616 ++MI;
4617 }
4618 }
4619
4620 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4621
4622 // Call target function to spill and restore FP and BP registers.
4623 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4624 }
4625 }
4626}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isFuncletReturnInstr(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr Register SPReg
static constexpr Register FPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:160
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
reverse_iterator rbegin() const
Definition: ArrayRef.h:159
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:905
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1048
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
size_t arg_size() const
Definition: Function.h:901
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:682
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:661
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:582
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:693
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:656
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:575
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition: MCDwarf.h:676
OpType getOperation() const
Definition: MCDwarf.h:710
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:687
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:598
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition: MCDwarf.h:681
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1072
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1073
A single uniqued string.
Definition: Metadata.h:724
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:71
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:580
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:501
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:297
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:597
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
A tuple of MDNodes.
Definition: Metadata.h:1737
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1425
unsigned getNumOperands() const
Definition: Metadata.cpp:1421
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:115
Represents a location in source code.
Definition: SMLoc.h:23
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
static constexpr size_t npos
Definition: StringRef.h:53
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:655
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:588
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:454
Value * getValue() const
Definition: Metadata.h:494
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:327
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:118
bool isTargetDragonFly() const
Definition: X86Subtarget.h:287
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:305
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:285
bool isTargetWin64() const
Definition: X86Subtarget.h:329
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:391
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:309
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:342
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:286
bool isTargetNaCl64() const
Definition: X86Subtarget.h:301
bool isTargetWin32() const
Definition: X86Subtarget.h:331
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:218
bool isTargetLinux() const
Definition: X86Subtarget.h:295
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:556
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:194
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
union llvm::TargetFrameLowering::DwarfFrameBase::@248 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76