LLVM 17.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/Function.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/Debug.h"
37#include <cstdlib>
38
39#define DEBUG_TYPE "x86-fl"
40
41STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
42STATISTIC(NumFrameExtraProbe,
43 "Number of extra stack probes generated in prologue");
44
45using namespace llvm;
46
48 MaybeAlign StackAlignOverride)
49 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
50 STI.is64Bit() ? -8 : -4),
51 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
52 // Cache a bunch of frame-related predicates for this subtarget.
54 Is64Bit = STI.is64Bit();
56 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
59}
60
62 return !MF.getFrameInfo().hasVarSizedObjects() &&
63 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
65}
66
67/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
68/// call frame pseudos can be simplified. Having a FP, as in the default
69/// implementation, is not sufficient here since we can't always use it.
70/// Use a more nuanced condition.
71bool
73 return hasReservedCallFrame(MF) ||
74 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
75 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
77}
78
79// needsFrameIndexResolution - Do we need to perform FI resolution for
80// this function. Normally, this is required only when the function
81// has any stack objects. However, FI resolution actually has another job,
82// not apparent from the title - it resolves callframesetup/destroy
83// that were not simplified earlier.
84// So, this is required for x86 functions that have push sequences even
85// when there are no stack objects.
86bool
88 return MF.getFrameInfo().hasStackObjects() ||
89 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
90}
91
92/// hasFP - Return true if the specified function should have a dedicated frame
93/// pointer register. This is true if the function has variable sized allocas
94/// or if frame pointer elimination is disabled.
96 const MachineFrameInfo &MFI = MF.getFrameInfo();
97 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
98 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
102 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
103 MFI.hasStackMap() || MFI.hasPatchPoint() ||
104 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
105}
106
107static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
108 if (IsLP64) {
109 if (isInt<8>(Imm))
110 return X86::SUB64ri8;
111 return X86::SUB64ri32;
112 } else {
113 if (isInt<8>(Imm))
114 return X86::SUB32ri8;
115 return X86::SUB32ri;
116 }
117}
118
119static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) {
120 if (IsLP64) {
121 if (isInt<8>(Imm))
122 return X86::ADD64ri8;
123 return X86::ADD64ri32;
124 } else {
125 if (isInt<8>(Imm))
126 return X86::ADD32ri8;
127 return X86::ADD32ri;
128 }
129}
130
131static unsigned getSUBrrOpcode(bool IsLP64) {
132 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
133}
134
135static unsigned getADDrrOpcode(bool IsLP64) {
136 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
137}
138
139static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
140 if (IsLP64) {
141 if (isInt<8>(Imm))
142 return X86::AND64ri8;
143 return X86::AND64ri32;
144 }
145 if (isInt<8>(Imm))
146 return X86::AND32ri8;
147 return X86::AND32ri;
148}
149
150static unsigned getLEArOpcode(bool IsLP64) {
151 return IsLP64 ? X86::LEA64r : X86::LEA32r;
152}
153
154static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
155 if (Use64BitReg) {
156 if (isUInt<32>(Imm))
157 return X86::MOV32ri64;
158 if (isInt<32>(Imm))
159 return X86::MOV64ri32;
160 return X86::MOV64ri;
161 }
162 return X86::MOV32ri;
163}
164
167 unsigned Reg = RegMask.PhysReg;
168
169 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
170 Reg == X86::AH || Reg == X86::AL)
171 return true;
172 }
173
174 return false;
175}
176
177/// Check if the flags need to be preserved before the terminators.
178/// This would be the case, if the eflags is live-in of the region
179/// composed by the terminators or live-out of that region, without
180/// being defined by a terminator.
181static bool
183 for (const MachineInstr &MI : MBB.terminators()) {
184 bool BreakNext = false;
185 for (const MachineOperand &MO : MI.operands()) {
186 if (!MO.isReg())
187 continue;
188 Register Reg = MO.getReg();
189 if (Reg != X86::EFLAGS)
190 continue;
191
192 // This terminator needs an eflags that is not defined
193 // by a previous another terminator:
194 // EFLAGS is live-in of the region composed by the terminators.
195 if (!MO.isDef())
196 return true;
197 // This terminator defines the eflags, i.e., we don't need to preserve it.
198 // However, we still need to check this specific terminator does not
199 // read a live-in value.
200 BreakNext = true;
201 }
202 // We found a definition of the eflags, no need to preserve them.
203 if (BreakNext)
204 return false;
205 }
206
207 // None of the terminators use or define the eflags.
208 // Check if they are live-out, that would imply we need to preserve them.
209 for (const MachineBasicBlock *Succ : MBB.successors())
210 if (Succ->isLiveIn(X86::EFLAGS))
211 return true;
212
213 return false;
214}
215
216/// emitSPUpdate - Emit a series of instructions to increment / decrement the
217/// stack pointer by a constant value.
220 const DebugLoc &DL,
221 int64_t NumBytes, bool InEpilogue) const {
222 bool isSub = NumBytes < 0;
223 uint64_t Offset = isSub ? -NumBytes : NumBytes;
226
227 uint64_t Chunk = (1LL << 31) - 1;
228
232 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
233
234 // It's ok to not take into account large chunks when probing, as the
235 // allocation is split in smaller chunks anyway.
236 if (EmitInlineStackProbe && !InEpilogue) {
237
238 // This pseudo-instruction is going to be expanded, potentially using a
239 // loop, by inlineStackProbe().
240 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
241 return;
242 } else if (Offset > Chunk) {
243 // Rather than emit a long series of instructions for large offsets,
244 // load the offset into a register and do one sub/add
245 unsigned Reg = 0;
246 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
247
248 if (isSub && !isEAXLiveIn(MBB))
249 Reg = Rax;
250 else
252
253 unsigned AddSubRROpc =
255 if (Reg) {
257 .addImm(Offset)
258 .setMIFlag(Flag);
259 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
261 .addReg(Reg);
262 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
263 return;
264 } else if (Offset > 8 * Chunk) {
265 // If we would need more than 8 add or sub instructions (a >16GB stack
266 // frame), it's worth spilling RAX to materialize this immediate.
267 // pushq %rax
268 // movabsq +-$Offset+-SlotSize, %rax
269 // addq %rsp, %rax
270 // xchg %rax, (%rsp)
271 // movq (%rsp), %rsp
272 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
273 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
275 .setMIFlag(Flag);
276 // Subtract is not commutative, so negate the offset and always use add.
277 // Subtract 8 less and add 8 more to account for the PUSH we just did.
278 if (isSub)
279 Offset = -(Offset - SlotSize);
280 else
283 .addImm(Offset)
284 .setMIFlag(Flag);
285 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
286 .addReg(Rax)
288 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
289 // Exchange the new SP in RAX with the top of the stack.
291 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
292 StackPtr, false, 0);
293 // Load new SP from the top of the stack into RSP.
294 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
295 StackPtr, false, 0);
296 return;
297 }
298 }
299
300 while (Offset) {
301 uint64_t ThisVal = std::min(Offset, Chunk);
302 if (ThisVal == SlotSize) {
303 // Use push / pop for slot sized adjustments as a size optimization. We
304 // need to find a dead register when using pop.
305 unsigned Reg = isSub
306 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
308 if (Reg) {
309 unsigned Opc = isSub
310 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
311 : (Is64Bit ? X86::POP64r : X86::POP32r);
312 BuildMI(MBB, MBBI, DL, TII.get(Opc))
313 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
314 .setMIFlag(Flag);
315 Offset -= ThisVal;
316 continue;
317 }
318 }
319
320 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
321 .setMIFlag(Flag);
322
323 Offset -= ThisVal;
324 }
325}
326
327MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
329 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
330 assert(Offset != 0 && "zero offset stack adjustment requested");
331
332 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
333 // is tricky.
334 bool UseLEA;
335 if (!InEpilogue) {
336 // Check if inserting the prologue at the beginning
337 // of MBB would require to use LEA operations.
338 // We need to use LEA operations if EFLAGS is live in, because
339 // it means an instruction will read it before it gets defined.
340 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
341 } else {
342 // If we can use LEA for SP but we shouldn't, check that none
343 // of the terminators uses the eflags. Otherwise we will insert
344 // a ADD that will redefine the eflags and break the condition.
345 // Alternatively, we could move the ADD, but this may not be possible
346 // and is an optimization anyway.
348 if (UseLEA && !STI.useLeaForSP())
350 // If that assert breaks, that means we do not do the right thing
351 // in canUseAsEpilogue.
353 "We shouldn't have allowed this insertion point");
354 }
355
357 if (UseLEA) {
360 StackPtr),
361 StackPtr, false, Offset);
362 } else {
363 bool IsSub = Offset < 0;
364 uint64_t AbsOffset = IsSub ? -Offset : Offset;
365 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
366 : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
367 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
369 .addImm(AbsOffset);
370 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
371 }
372 return MI;
373}
374
377 bool doMergeWithPrevious) const {
378 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
379 (!doMergeWithPrevious && MBBI == MBB.end()))
380 return 0;
381
382 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
383
385 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
386 // instruction, and that there are no DBG_VALUE or other instructions between
387 // ADD/SUB/LEA and its corresponding CFI instruction.
388 /* TODO: Add support for the case where there are multiple CFI instructions
389 below the ADD/SUB/LEA, e.g.:
390 ...
391 add
392 cfi_def_cfa_offset
393 cfi_offset
394 ...
395 */
396 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
397 PI = std::prev(PI);
398
399 unsigned Opc = PI->getOpcode();
400 int Offset = 0;
401
402 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
403 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
404 PI->getOperand(0).getReg() == StackPtr){
405 assert(PI->getOperand(1).getReg() == StackPtr);
406 Offset = PI->getOperand(2).getImm();
407 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
408 PI->getOperand(0).getReg() == StackPtr &&
409 PI->getOperand(1).getReg() == StackPtr &&
410 PI->getOperand(2).getImm() == 1 &&
411 PI->getOperand(3).getReg() == X86::NoRegister &&
412 PI->getOperand(5).getReg() == X86::NoRegister) {
413 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
414 Offset = PI->getOperand(4).getImm();
415 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
416 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
417 PI->getOperand(0).getReg() == StackPtr) {
418 assert(PI->getOperand(1).getReg() == StackPtr);
419 Offset = -PI->getOperand(2).getImm();
420 } else
421 return 0;
422
423 PI = MBB.erase(PI);
424 if (PI != MBB.end() && PI->isCFIInstruction()) {
425 auto CIs = MBB.getParent()->getFrameInstructions();
426 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
429 PI = MBB.erase(PI);
430 }
431 if (!doMergeWithPrevious)
433
434 return Offset;
435}
436
439 const DebugLoc &DL,
440 const MCCFIInstruction &CFIInst,
441 MachineInstr::MIFlag Flag) const {
443 unsigned CFIIndex = MF.addFrameInst(CFIInst);
444 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
445 .addCFIIndex(CFIIndex)
446 .setMIFlag(Flag);
447}
448
449/// Emits Dwarf Info specifying offsets of callee saved registers and
450/// frame pointer. This is called only when basic block sections are enabled.
454 if (!hasFP(MF)) {
456 return;
457 }
458 const MachineModuleInfo &MMI = MF.getMMI();
461 const Register MachineFramePtr =
463 : FramePtr;
464 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
465 // Offset = space for return address + size of the frame pointer itself.
466 unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
468 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
470}
471
474 const DebugLoc &DL, bool IsPrologue) const {
476 MachineFrameInfo &MFI = MF.getFrameInfo();
477 MachineModuleInfo &MMI = MF.getMMI();
479
480 // Add callee saved registers to move list.
481 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
482
483 // Calculate offsets.
484 for (const CalleeSavedInfo &I : CSI) {
485 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
486 Register Reg = I.getReg();
487 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
488
489 if (IsPrologue) {
491 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
492 } else {
494 MCCFIInstruction::createRestore(nullptr, DwarfReg));
495 }
496 }
497}
498
499void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
500 MachineBasicBlock &MBB) const {
501 const MachineFunction &MF = *MBB.getParent();
502
503 // Insertion point.
505
506 // Fake a debug loc.
507 DebugLoc DL;
508 if (MBBI != MBB.end())
509 DL = MBBI->getDebugLoc();
510
511 // Zero out FP stack if referenced. Do this outside of the loop below so that
512 // it's done only once.
513 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
514 for (MCRegister Reg : RegsToZero.set_bits()) {
515 if (!X86::RFP80RegClass.contains(Reg))
516 continue;
517
518 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
519 for (unsigned i = 0; i != NumFPRegs; ++i)
520 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
521
522 for (unsigned i = 0; i != NumFPRegs; ++i)
523 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
524 break;
525 }
526
527 // For GPRs, we only care to clear out the 32-bit register.
528 BitVector GPRsToZero(TRI->getNumRegs());
529 for (MCRegister Reg : RegsToZero.set_bits())
530 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
531 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
532 RegsToZero.reset(Reg);
533 }
534
535 for (MCRegister Reg : GPRsToZero.set_bits())
536 BuildMI(MBB, MBBI, DL, TII.get(X86::XOR32rr), Reg)
538 .addReg(Reg, RegState::Undef);
539
540 // Zero out registers.
541 for (MCRegister Reg : RegsToZero.set_bits()) {
542 if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
543 // FIXME: Ignore MMX registers?
544 continue;
545
546 unsigned XorOp;
547 if (X86::VR128RegClass.contains(Reg)) {
548 // XMM#
549 if (!ST.hasSSE1())
550 continue;
551 XorOp = X86::PXORrr;
552 } else if (X86::VR256RegClass.contains(Reg)) {
553 // YMM#
554 if (!ST.hasAVX())
555 continue;
556 XorOp = X86::VPXORrr;
557 } else if (X86::VR512RegClass.contains(Reg)) {
558 // ZMM#
559 if (!ST.hasAVX512())
560 continue;
561 XorOp = X86::VPXORYrr;
562 } else if (X86::VK1RegClass.contains(Reg) ||
563 X86::VK2RegClass.contains(Reg) ||
564 X86::VK4RegClass.contains(Reg) ||
565 X86::VK8RegClass.contains(Reg) ||
566 X86::VK16RegClass.contains(Reg)) {
567 if (!ST.hasVLX())
568 continue;
569 XorOp = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
570 } else {
571 continue;
572 }
573
574 BuildMI(MBB, MBBI, DL, TII.get(XorOp), Reg)
576 .addReg(Reg, RegState::Undef);
577 }
578}
579
582 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
583 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
586 if (InProlog) {
587 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
588 .addImm(0 /* no explicit stack size */);
589 } else {
590 emitStackProbeInline(MF, MBB, MBBI, DL, false);
591 }
592 } else {
593 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
594 }
595}
596
598 return STI.isOSWindows() && !STI.isTargetWin64();
599}
600
602 MachineBasicBlock &PrologMBB) const {
603 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
604 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
605 });
606 if (Where != PrologMBB.end()) {
607 DebugLoc DL = PrologMBB.findDebugLoc(Where);
608 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
609 Where->eraseFromParent();
610 }
611}
612
613void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
616 const DebugLoc &DL,
617 bool InProlog) const {
619 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
620 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
621 else
622 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
623}
624
625void X86FrameLowering::emitStackProbeInlineGeneric(
627 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
628 MachineInstr &AllocWithProbe = *MBBI;
629 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
630
633 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
634 "different expansion expected for CoreCLR 64 bit");
635
636 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
637 uint64_t ProbeChunk = StackProbeSize * 8;
638
639 uint64_t MaxAlign =
640 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
641
642 // Synthesize a loop or unroll it, depending on the number of iterations.
643 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
644 // between the unaligned rsp and current rsp.
645 if (Offset > ProbeChunk) {
646 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
647 MaxAlign % StackProbeSize);
648 } else {
649 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
650 MaxAlign % StackProbeSize);
651 }
652}
653
654void X86FrameLowering::emitStackProbeInlineGenericBlock(
657 uint64_t AlignOffset) const {
658
659 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
660 const bool HasFP = hasFP(MF);
663 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
664 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
665
666 uint64_t CurrentOffset = 0;
667
668 assert(AlignOffset < StackProbeSize);
669
670 // If the offset is so small it fits within a page, there's nothing to do.
671 if (StackProbeSize < Offset + AlignOffset) {
672
673 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
674 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
676 if (!HasFP && NeedsDwarfCFI) {
677 BuildCFI(
678 MBB, MBBI, DL,
679 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
680 }
681
682 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
684 StackPtr, false, 0)
685 .addImm(0)
687 NumFrameExtraProbe++;
688 CurrentOffset = StackProbeSize - AlignOffset;
689 }
690
691 // For the next N - 1 pages, just probe. I tried to take advantage of
692 // natural probes but it implies much more logic and there was very few
693 // interesting natural probes to interleave.
694 while (CurrentOffset + StackProbeSize < Offset) {
695 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
697
698 if (!HasFP && NeedsDwarfCFI) {
699 BuildCFI(
700 MBB, MBBI, DL,
701 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
702 }
703 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
705 StackPtr, false, 0)
706 .addImm(0)
708 NumFrameExtraProbe++;
709 CurrentOffset += StackProbeSize;
710 }
711
712 // No need to probe the tail, it is smaller than a Page.
713 uint64_t ChunkSize = Offset - CurrentOffset;
714 if (ChunkSize == SlotSize) {
715 // Use push for slot sized adjustments as a size optimization,
716 // like emitSPUpdate does when not probing.
717 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
718 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
719 BuildMI(MBB, MBBI, DL, TII.get(Opc))
722 } else {
723 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
725 }
726 // No need to adjust Dwarf CFA offset here, the last position of the stack has
727 // been defined
728}
729
730void X86FrameLowering::emitStackProbeInlineGenericLoop(
733 uint64_t AlignOffset) const {
734 assert(Offset && "null offset");
735
736 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
738 "Inline stack probe loop will clobber live EFLAGS.");
739
740 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
741 const bool HasFP = hasFP(MF);
744 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
745 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
746
747 if (AlignOffset) {
748 if (AlignOffset < StackProbeSize) {
749 // Perform a first smaller allocation followed by a probe.
750 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
752
753 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
755 StackPtr, false, 0)
756 .addImm(0)
758 NumFrameExtraProbe++;
759 Offset -= AlignOffset;
760 }
761 }
762
763 // Synthesize a loop
764 NumFrameLoopProbe++;
765 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
766
767 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
768 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
769
771 MF.insert(MBBIter, testMBB);
772 MF.insert(MBBIter, tailMBB);
773
774 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
775 : Is64Bit ? X86::R11D
776 : X86::EAX;
777
778 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
781
782 // save loop bound
783 {
784 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
785 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset);
786 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
787 .addReg(FinalStackProbed)
788 .addImm(BoundOffset)
790
791 // while in the loop, use loop-invariant reg for CFI,
792 // instead of the stack pointer, which changes during the loop
793 if (!HasFP && NeedsDwarfCFI) {
794 // x32 uses the same DWARF register numbers as x86-64,
795 // so there isn't a register number for r11d, we must use r11 instead
796 const Register DwarfFinalStackProbed =
798 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
799 : FinalStackProbed;
800
803 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
805 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
806 }
807 }
808
809 // allocate a page
810 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
811 /*InEpilogue=*/false)
813
814 // touch the page
815 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
817 StackPtr, false, 0)
818 .addImm(0)
820
821 // cmp with stack pointer bound
822 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
824 .addReg(FinalStackProbed)
826
827 // jump
828 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
829 .addMBB(testMBB)
832 testMBB->addSuccessor(testMBB);
833 testMBB->addSuccessor(tailMBB);
834
835 // BB management
836 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
838 MBB.addSuccessor(testMBB);
839
840 // handle tail
841 const uint64_t TailOffset = Offset % StackProbeSize;
842 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
843 if (TailOffset) {
844 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
845 /*InEpilogue=*/false)
847 }
848
849 // after the loop, switch back to stack pointer for CFI
850 if (!HasFP && NeedsDwarfCFI) {
851 // x32 uses the same DWARF register numbers as x86-64,
852 // so there isn't a register number for esp, we must use rsp instead
853 const Register DwarfStackPtr =
857
858 BuildCFI(*tailMBB, TailMBBIter, DL,
860 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
861 }
862
863 // Update Live In information
864 recomputeLiveIns(*testMBB);
865 recomputeLiveIns(*tailMBB);
866}
867
868void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
870 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
872 assert(STI.is64Bit() && "different expansion needed for 32 bit");
873 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
875 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
876
877 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
879 "Inline stack probe loop will clobber live EFLAGS.");
880
881 // RAX contains the number of bytes of desired stack adjustment.
882 // The handling here assumes this value has already been updated so as to
883 // maintain stack alignment.
884 //
885 // We need to exit with RSP modified by this amount and execute suitable
886 // page touches to notify the OS that we're growing the stack responsibly.
887 // All stack probing must be done without modifying RSP.
888 //
889 // MBB:
890 // SizeReg = RAX;
891 // ZeroReg = 0
892 // CopyReg = RSP
893 // Flags, TestReg = CopyReg - SizeReg
894 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
895 // LimitReg = gs magic thread env access
896 // if FinalReg >= LimitReg goto ContinueMBB
897 // RoundBB:
898 // RoundReg = page address of FinalReg
899 // LoopMBB:
900 // LoopReg = PHI(LimitReg,ProbeReg)
901 // ProbeReg = LoopReg - PageSize
902 // [ProbeReg] = 0
903 // if (ProbeReg > RoundReg) goto LoopMBB
904 // ContinueMBB:
905 // RSP = RSP - RAX
906 // [rest of original MBB]
907
908 // Set up the new basic blocks
909 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
910 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
911 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
912
913 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
914 MF.insert(MBBIter, RoundMBB);
915 MF.insert(MBBIter, LoopMBB);
916 MF.insert(MBBIter, ContinueMBB);
917
918 // Split MBB and move the tail portion down to ContinueMBB.
919 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
920 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
922
923 // Some useful constants
924 const int64_t ThreadEnvironmentStackLimit = 0x10;
925 const int64_t PageSize = 0x1000;
926 const int64_t PageMask = ~(PageSize - 1);
927
928 // Registers we need. For the normal case we use virtual
929 // registers. For the prolog expansion we use RAX, RCX and RDX.
931 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
932 const Register SizeReg = InProlog ? X86::RAX
933 : MRI.createVirtualRegister(RegClass),
934 ZeroReg = InProlog ? X86::RCX
935 : MRI.createVirtualRegister(RegClass),
936 CopyReg = InProlog ? X86::RDX
937 : MRI.createVirtualRegister(RegClass),
938 TestReg = InProlog ? X86::RDX
939 : MRI.createVirtualRegister(RegClass),
940 FinalReg = InProlog ? X86::RDX
941 : MRI.createVirtualRegister(RegClass),
942 RoundedReg = InProlog ? X86::RDX
943 : MRI.createVirtualRegister(RegClass),
944 LimitReg = InProlog ? X86::RCX
945 : MRI.createVirtualRegister(RegClass),
946 JoinReg = InProlog ? X86::RCX
947 : MRI.createVirtualRegister(RegClass),
948 ProbeReg = InProlog ? X86::RCX
949 : MRI.createVirtualRegister(RegClass);
950
951 // SP-relative offsets where we can save RCX and RDX.
952 int64_t RCXShadowSlot = 0;
953 int64_t RDXShadowSlot = 0;
954
955 // If inlining in the prolog, save RCX and RDX.
956 if (InProlog) {
957 // Compute the offsets. We need to account for things already
958 // pushed onto the stack at this point: return address, frame
959 // pointer (if used), and callee saves.
961 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
962 const bool HasFP = hasFP(MF);
963
964 // Check if we need to spill RCX and/or RDX.
965 // Here we assume that no earlier prologue instruction changes RCX and/or
966 // RDX, so checking the block live-ins is enough.
967 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
968 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
969 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
970 // Assign the initial slot to both registers, then change RDX's slot if both
971 // need to be spilled.
972 if (IsRCXLiveIn)
973 RCXShadowSlot = InitSlot;
974 if (IsRDXLiveIn)
975 RDXShadowSlot = InitSlot;
976 if (IsRDXLiveIn && IsRCXLiveIn)
977 RDXShadowSlot += 8;
978 // Emit the saves if needed.
979 if (IsRCXLiveIn)
980 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
981 RCXShadowSlot)
982 .addReg(X86::RCX);
983 if (IsRDXLiveIn)
984 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
985 RDXShadowSlot)
986 .addReg(X86::RDX);
987 } else {
988 // Not in the prolog. Copy RAX to a virtual reg.
989 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
990 }
991
992 // Add code to MBB to check for overflow and set the new target stack pointer
993 // to zero if so.
994 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
995 .addReg(ZeroReg, RegState::Undef)
996 .addReg(ZeroReg, RegState::Undef);
997 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
998 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
999 .addReg(CopyReg)
1000 .addReg(SizeReg);
1001 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1002 .addReg(TestReg)
1003 .addReg(ZeroReg)
1005
1006 // FinalReg now holds final stack pointer value, or zero if
1007 // allocation would overflow. Compare against the current stack
1008 // limit from the thread environment block. Note this limit is the
1009 // lowest touched page on the stack, not the point at which the OS
1010 // will cause an overflow exception, so this is just an optimization
1011 // to avoid unnecessarily touching pages that are below the current
1012 // SP but already committed to the stack by the OS.
1013 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1014 .addReg(0)
1015 .addImm(1)
1016 .addReg(0)
1017 .addImm(ThreadEnvironmentStackLimit)
1018 .addReg(X86::GS);
1019 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1020 // Jump if the desired stack pointer is at or above the stack limit.
1021 BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
1022
1023 // Add code to roundMBB to round the final stack pointer to a page boundary.
1024 RoundMBB->addLiveIn(FinalReg);
1025 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1026 .addReg(FinalReg)
1027 .addImm(PageMask);
1028 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1029
1030 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1031 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1032 // and probe until we reach RoundedReg.
1033 if (!InProlog) {
1034 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1035 .addReg(LimitReg)
1036 .addMBB(RoundMBB)
1037 .addReg(ProbeReg)
1038 .addMBB(LoopMBB);
1039 }
1040
1041 LoopMBB->addLiveIn(JoinReg);
1042 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1043 false, -PageSize);
1044
1045 // Probe by storing a byte onto the stack.
1046 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1047 .addReg(ProbeReg)
1048 .addImm(1)
1049 .addReg(0)
1050 .addImm(0)
1051 .addReg(0)
1052 .addImm(0);
1053
1054 LoopMBB->addLiveIn(RoundedReg);
1055 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1056 .addReg(RoundedReg)
1057 .addReg(ProbeReg);
1058 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
1059
1060 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1061
1062 // If in prolog, restore RDX and RCX.
1063 if (InProlog) {
1064 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1065 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1066 TII.get(X86::MOV64rm), X86::RCX),
1067 X86::RSP, false, RCXShadowSlot);
1068 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1069 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1070 TII.get(X86::MOV64rm), X86::RDX),
1071 X86::RSP, false, RDXShadowSlot);
1072 }
1073
1074 // Now that the probing is done, add code to continueMBB to update
1075 // the stack pointer for real.
1076 ContinueMBB->addLiveIn(SizeReg);
1077 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1078 .addReg(X86::RSP)
1079 .addReg(SizeReg);
1080
1081 // Add the control flow edges we need.
1082 MBB.addSuccessor(ContinueMBB);
1083 MBB.addSuccessor(RoundMBB);
1084 RoundMBB->addSuccessor(LoopMBB);
1085 LoopMBB->addSuccessor(ContinueMBB);
1086 LoopMBB->addSuccessor(LoopMBB);
1087
1088 // Mark all the instructions added to the prolog as frame setup.
1089 if (InProlog) {
1090 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1091 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1092 }
1093 for (MachineInstr &MI : *RoundMBB) {
1095 }
1096 for (MachineInstr &MI : *LoopMBB) {
1098 }
1099 for (MachineInstr &MI :
1100 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1102 }
1103 }
1104}
1105
1106void X86FrameLowering::emitStackProbeCall(
1108 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1109 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1110 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1111
1112 // FIXME: Add indirect thunk support and remove this.
1113 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1114 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1115 "code model and indirect thunks not yet implemented.");
1116
1117 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1119 "Stack probe calls will clobber live EFLAGS.");
1120
1121 unsigned CallOp;
1122 if (Is64Bit)
1123 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1124 else
1125 CallOp = X86::CALLpcrel32;
1126
1128
1130 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1131
1132 // All current stack probes take AX and SP as input, clobber flags, and
1133 // preserve all registers. x86_64 probes leave RSP unmodified.
1135 // For the large code model, we have to call through a register. Use R11,
1136 // as it is scratch in all supported calling conventions.
1137 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1139 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1140 } else {
1141 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1143 }
1144
1145 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1146 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1152
1153 MachineInstr *ModInst = CI;
1154 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1155 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1156 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1157 // themselves. They also does not clobber %rax so we can reuse it when
1158 // adjusting %rsp.
1159 // All other platforms do not specify a particular ABI for the stack probe
1160 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1161 ModInst =
1163 .addReg(SP)
1164 .addReg(AX);
1165 }
1166
1167 // DebugInfo variable locations -- if there's an instruction number for the
1168 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1169 // modifies SP.
1170 if (InstrNum) {
1171 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1172 // Label destination operand of the subtract.
1173 MF.makeDebugValueSubstitution(*InstrNum,
1174 {ModInst->getDebugInstrNum(), 0});
1175 } else {
1176 // Label the call. The operand number is the penultimate operand, zero
1177 // based.
1178 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1180 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1181 }
1182 }
1183
1184 if (InProlog) {
1185 // Apply the frame setup flag to all inserted instrs.
1186 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1187 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1188 }
1189}
1190
1191static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1192 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1193 // and might require smaller successive adjustments.
1194 const uint64_t Win64MaxSEHOffset = 128;
1195 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1196 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1197 return SEHFrameOffset & -16;
1198}
1199
1200// If we're forcing a stack realignment we can't rely on just the frame
1201// info, we need to know the ABI stack alignment as well in case we
1202// have a call out. Otherwise just make sure we have some alignment - we'll
1203// go with the minimum SlotSize.
1204uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1205 const MachineFrameInfo &MFI = MF.getFrameInfo();
1206 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1208 if (MF.getFunction().hasFnAttribute("stackrealign")) {
1209 if (MFI.hasCalls())
1210 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1211 else if (MaxAlign < SlotSize)
1212 MaxAlign = Align(SlotSize);
1213 }
1214 return MaxAlign.value();
1215}
1216
1217void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1219 const DebugLoc &DL, unsigned Reg,
1220 uint64_t MaxAlign) const {
1221 uint64_t Val = -MaxAlign;
1222 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1223
1224 MachineFunction &MF = *MBB.getParent();
1226 const X86TargetLowering &TLI = *STI.getTargetLowering();
1227 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1228 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1229
1230 // We want to make sure that (in worst case) less than StackProbeSize bytes
1231 // are not probed after the AND. This assumption is used in
1232 // emitStackProbeInlineGeneric.
1233 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1234 {
1235 NumFrameLoopProbe++;
1236 MachineBasicBlock *entryMBB =
1238 MachineBasicBlock *headMBB =
1240 MachineBasicBlock *bodyMBB =
1242 MachineBasicBlock *footMBB =
1244
1246 MF.insert(MBBIter, entryMBB);
1247 MF.insert(MBBIter, headMBB);
1248 MF.insert(MBBIter, bodyMBB);
1249 MF.insert(MBBIter, footMBB);
1250 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1251 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1252 : Is64Bit ? X86::R11D
1253 : X86::EAX;
1254
1255 // Setup entry block
1256 {
1257
1258 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1259 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1262 MachineInstr *MI =
1263 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1264 .addReg(FinalStackProbed)
1265 .addImm(Val)
1267
1268 // The EFLAGS implicit def is dead.
1269 MI->getOperand(3).setIsDead();
1270
1271 BuildMI(entryMBB, DL,
1272 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1273 .addReg(FinalStackProbed)
1276 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1277 .addMBB(&MBB)
1280 entryMBB->addSuccessor(headMBB);
1281 entryMBB->addSuccessor(&MBB);
1282 }
1283
1284 // Loop entry block
1285
1286 {
1287 const unsigned SUBOpc =
1288 getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
1289 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1291 .addImm(StackProbeSize)
1293
1294 BuildMI(headMBB, DL,
1295 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1297 .addReg(FinalStackProbed)
1299
1300 // jump to the footer if StackPtr < FinalStackProbed
1301 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1302 .addMBB(footMBB)
1305
1306 headMBB->addSuccessor(bodyMBB);
1307 headMBB->addSuccessor(footMBB);
1308 }
1309
1310 // setup loop body
1311 {
1312 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1314 StackPtr, false, 0)
1315 .addImm(0)
1317
1318 const unsigned SUBOpc =
1319 getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
1320 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1322 .addImm(StackProbeSize)
1324
1325 // cmp with stack pointer bound
1326 BuildMI(bodyMBB, DL,
1327 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1328 .addReg(FinalStackProbed)
1331
1332 // jump back while FinalStackProbed < StackPtr
1333 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1334 .addMBB(bodyMBB)
1337 bodyMBB->addSuccessor(bodyMBB);
1338 bodyMBB->addSuccessor(footMBB);
1339 }
1340
1341 // setup loop footer
1342 {
1343 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1344 .addReg(FinalStackProbed)
1346 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1348 StackPtr, false, 0)
1349 .addImm(0)
1351 footMBB->addSuccessor(&MBB);
1352 }
1353
1354 recomputeLiveIns(*headMBB);
1355 recomputeLiveIns(*bodyMBB);
1356 recomputeLiveIns(*footMBB);
1358 }
1359 } else {
1360 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1361 .addReg(Reg)
1362 .addImm(Val)
1364
1365 // The EFLAGS implicit def is dead.
1366 MI->getOperand(3).setIsDead();
1367 }
1368}
1369
1371 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1372 // clobbered by any interrupt handler.
1373 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1374 "MF used frame lowering for wrong subtarget");
1375 const Function &Fn = MF.getFunction();
1376 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1377 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1378}
1379
1380/// Return true if we need to use the restricted Windows x64 prologue and
1381/// epilogue code patterns that can be described with WinCFI (.seh_*
1382/// directives).
1383bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1384 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1385}
1386
1387bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1388 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1389}
1390
1391/// emitPrologue - Push callee-saved registers onto the stack, which
1392/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1393/// space for local variables. Also emit labels used by the exception handler to
1394/// generate the exception handling frames.
1395
1396/*
1397 Here's a gist of what gets emitted:
1398
1399 ; Establish frame pointer, if needed
1400 [if needs FP]
1401 push %rbp
1402 .cfi_def_cfa_offset 16
1403 .cfi_offset %rbp, -16
1404 .seh_pushreg %rpb
1405 mov %rsp, %rbp
1406 .cfi_def_cfa_register %rbp
1407
1408 ; Spill general-purpose registers
1409 [for all callee-saved GPRs]
1410 pushq %<reg>
1411 [if not needs FP]
1412 .cfi_def_cfa_offset (offset from RETADDR)
1413 .seh_pushreg %<reg>
1414
1415 ; If the required stack alignment > default stack alignment
1416 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1417 ; of unknown size in the stack frame.
1418 [if stack needs re-alignment]
1419 and $MASK, %rsp
1420
1421 ; Allocate space for locals
1422 [if target is Windows and allocated space > 4096 bytes]
1423 ; Windows needs special care for allocations larger
1424 ; than one page.
1425 mov $NNN, %rax
1426 call ___chkstk_ms/___chkstk
1427 sub %rax, %rsp
1428 [else]
1429 sub $NNN, %rsp
1430
1431 [if needs FP]
1432 .seh_stackalloc (size of XMM spill slots)
1433 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1434 [else]
1435 .seh_stackalloc NNN
1436
1437 ; Spill XMMs
1438 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1439 ; they may get spilled on any platform, if the current function
1440 ; calls @llvm.eh.unwind.init
1441 [if needs FP]
1442 [for all callee-saved XMM registers]
1443 movaps %<xmm reg>, -MMM(%rbp)
1444 [for all callee-saved XMM registers]
1445 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1446 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1447 [else]
1448 [for all callee-saved XMM registers]
1449 movaps %<xmm reg>, KKK(%rsp)
1450 [for all callee-saved XMM registers]
1451 .seh_savexmm %<xmm reg>, KKK
1452
1453 .seh_endprologue
1454
1455 [if needs base pointer]
1456 mov %rsp, %rbx
1457 [if needs to restore base pointer]
1458 mov %rsp, -MMM(%rbp)
1459
1460 ; Emit CFI info
1461 [if needs FP]
1462 [for all callee-saved registers]
1463 .cfi_offset %<reg>, (offset from %rbp)
1464 [else]
1465 .cfi_def_cfa_offset (offset from RETADDR)
1466 [for all callee-saved registers]
1467 .cfi_offset %<reg>, (offset from %rsp)
1468
1469 Notes:
1470 - .seh directives are emitted only for Windows 64 ABI
1471 - .cv_fpo directives are emitted on win32 when emitting CodeView
1472 - .cfi directives are emitted for all other ABIs
1473 - for 32-bit code, substitute %e?? registers for %r??
1474*/
1475
1477 MachineBasicBlock &MBB) const {
1478 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1479 "MF used frame lowering for wrong subtarget");
1481 MachineFrameInfo &MFI = MF.getFrameInfo();
1482 const Function &Fn = MF.getFunction();
1483 MachineModuleInfo &MMI = MF.getMMI();
1485 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1486 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1487 bool IsFunclet = MBB.isEHFuncletEntry();
1489 if (Fn.hasPersonalityFn())
1490 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1491 bool FnHasClrFunclet =
1492 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1493 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1494 bool HasFP = hasFP(MF);
1495 bool IsWin64Prologue = isWin64Prologue(MF);
1496 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1497 // FIXME: Emit FPO data for EH funclets.
1498 bool NeedsWinFPO =
1499 !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
1500 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1501 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1503 const Register MachineFramePtr =
1506 Register BasePtr = TRI->getBaseRegister();
1507 bool HasWinCFI = false;
1508
1509 // Debug location must be unknown since the first debug location is used
1510 // to determine the end of the prologue.
1511 DebugLoc DL;
1512
1513 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1514 // tail call.
1515 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1516 if (TailCallArgReserveSize && IsWin64Prologue)
1517 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1518
1519 const bool EmitStackProbeCall =
1521 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1522
1523 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1527 // The special symbol below is absolute and has a *value* suitable to be
1528 // combined with the frame pointer directly.
1529 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1530 .addUse(MachineFramePtr)
1531 .addUse(X86::RIP)
1532 .addImm(1)
1533 .addUse(X86::NoRegister)
1534 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1536 .addUse(X86::NoRegister);
1537 break;
1538 }
1539 [[fallthrough]];
1540
1542 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1543 .addUse(MachineFramePtr)
1544 .addImm(60)
1546 break;
1547
1549 break;
1550 }
1551 }
1552
1553 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1554 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1555 // stack alignment.
1557 Fn.arg_size() == 2) {
1558 StackSize += 8;
1559 MFI.setStackSize(StackSize);
1560 emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false);
1561 }
1562
1563 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1564 // function, and use up to 128 bytes of stack space, don't have a frame
1565 // pointer, calls, or dynamic alloca then we do not need to adjust the
1566 // stack pointer (we fit in the Red Zone). We also check that we don't
1567 // push and pop from the stack.
1568 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1569 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1570 !MFI.adjustsStack() && // No calls.
1571 !EmitStackProbeCall && // No stack probes.
1572 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1573 !MF.shouldSplitStack()) { // Regular stack
1574 uint64_t MinSize =
1576 if (HasFP) MinSize += SlotSize;
1577 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1578 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1579 MFI.setStackSize(StackSize);
1580 }
1581
1582 // Insert stack pointer adjustment for later moving of return addr. Only
1583 // applies to tail call optimized functions where the callee argument stack
1584 // size is bigger than the callers.
1585 if (TailCallArgReserveSize != 0) {
1586 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1587 /*InEpilogue=*/false)
1589 }
1590
1591 // Mapping for machine moves:
1592 //
1593 // DST: VirtualFP AND
1594 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1595 // ELSE => DW_CFA_def_cfa
1596 //
1597 // SRC: VirtualFP AND
1598 // DST: Register => DW_CFA_def_cfa_register
1599 //
1600 // ELSE
1601 // OFFSET < 0 => DW_CFA_offset_extended_sf
1602 // REG < 64 => DW_CFA_offset + Reg
1603 // ELSE => DW_CFA_offset_extended
1604
1605 uint64_t NumBytes = 0;
1606 int stackGrowth = -SlotSize;
1607
1608 // Find the funclet establisher parameter
1609 Register Establisher = X86::NoRegister;
1610 if (IsClrFunclet)
1611 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1612 else if (IsFunclet)
1613 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1614
1615 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1616 // Immediately spill establisher into the home slot.
1617 // The runtime cares about this.
1618 // MOV64mr %rdx, 16(%rsp)
1619 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1620 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1621 .addReg(Establisher)
1623 MBB.addLiveIn(Establisher);
1624 }
1625
1626 if (HasFP) {
1627 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1628
1629 // Calculate required stack adjustment.
1630 uint64_t FrameSize = StackSize - SlotSize;
1631 // If required, include space for extra hidden slot for stashing base pointer.
1632 if (X86FI->getRestoreBasePointer())
1633 FrameSize += SlotSize;
1634
1635 NumBytes = FrameSize -
1636 (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1637
1638 // Callee-saved registers are pushed on stack before the stack is realigned.
1639 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1640 NumBytes = alignTo(NumBytes, MaxAlign);
1641
1642 // Save EBP/RBP into the appropriate stack slot.
1643 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
1644 .addReg(MachineFramePtr, RegState::Kill)
1646
1647 if (NeedsDwarfCFI) {
1648 // Mark the place where EBP/RBP was saved.
1649 // Define the current CFA rule to use the provided offset.
1650 assert(StackSize);
1651 BuildCFI(MBB, MBBI, DL,
1652 MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth),
1654
1655 // Change the rule for the FramePtr to be an "offset" rule.
1656 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1657 BuildCFI(MBB, MBBI, DL,
1658 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1659 2 * stackGrowth),
1661 }
1662
1663 if (NeedsWinCFI) {
1664 HasWinCFI = true;
1665 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1668 }
1669
1670 if (!IsFunclet) {
1671 if (X86FI->hasSwiftAsyncContext()) {
1672 const auto &Attrs = MF.getFunction().getAttributes();
1673
1674 // Before we update the live frame pointer we have to ensure there's a
1675 // valid (or null) asynchronous context in its slot just before FP in
1676 // the frame record, so store it now.
1677 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1678 // We have an initial context in r14, store it just before the frame
1679 // pointer.
1680 MBB.addLiveIn(X86::R14);
1681 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1682 .addReg(X86::R14)
1684 } else {
1685 // No initial context, store null so that there's no pointer that
1686 // could be misused.
1687 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8))
1688 .addImm(0)
1690 }
1691
1692 if (NeedsWinCFI) {
1693 HasWinCFI = true;
1694 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1695 .addImm(X86::R14)
1697 }
1698
1699 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1700 .addUse(X86::RSP)
1701 .addImm(1)
1702 .addUse(X86::NoRegister)
1703 .addImm(8)
1704 .addUse(X86::NoRegister)
1706 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP)
1707 .addUse(X86::RSP)
1708 .addImm(8)
1710 }
1711
1712 if (!IsWin64Prologue && !IsFunclet) {
1713 // Update EBP with the new base value.
1714 if (!X86FI->hasSwiftAsyncContext())
1715 BuildMI(MBB, MBBI, DL,
1716 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1717 FramePtr)
1720
1721 if (NeedsDwarfCFI) {
1722 // Mark effective beginning of when frame pointer becomes valid.
1723 // Define the current CFA to use the EBP/RBP register.
1724 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1725 BuildCFI(
1726 MBB, MBBI, DL,
1727 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1729 }
1730
1731 if (NeedsWinFPO) {
1732 // .cv_fpo_setframe $FramePtr
1733 HasWinCFI = true;
1734 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1736 .addImm(0)
1738 }
1739 }
1740 }
1741 } else {
1742 assert(!IsFunclet && "funclets without FPs not yet implemented");
1743 NumBytes = StackSize -
1744 (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1745 }
1746
1747 // Update the offset adjustment, which is mainly used by codeview to translate
1748 // from ESP to VFRAME relative local variable offsets.
1749 if (!IsFunclet) {
1750 if (HasFP && TRI->hasStackRealignment(MF))
1751 MFI.setOffsetAdjustment(-NumBytes);
1752 else
1753 MFI.setOffsetAdjustment(-StackSize);
1754 }
1755
1756 // For EH funclets, only allocate enough space for outgoing calls. Save the
1757 // NumBytes value that we would've used for the parent frame.
1758 unsigned ParentFrameNumBytes = NumBytes;
1759 if (IsFunclet)
1760 NumBytes = getWinEHFuncletFrameSize(MF);
1761
1762 // Skip the callee-saved push instructions.
1763 bool PushedRegs = false;
1764 int StackOffset = 2 * stackGrowth;
1765
1766 while (MBBI != MBB.end() &&
1767 MBBI->getFlag(MachineInstr::FrameSetup) &&
1768 (MBBI->getOpcode() == X86::PUSH32r ||
1769 MBBI->getOpcode() == X86::PUSH64r)) {
1770 PushedRegs = true;
1771 Register Reg = MBBI->getOperand(0).getReg();
1772 ++MBBI;
1773
1774 if (!HasFP && NeedsDwarfCFI) {
1775 // Mark callee-saved push instruction.
1776 // Define the current CFA rule to use the provided offset.
1777 assert(StackSize);
1778 BuildCFI(MBB, MBBI, DL,
1781 StackOffset += stackGrowth;
1782 }
1783
1784 if (NeedsWinCFI) {
1785 HasWinCFI = true;
1786 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1787 .addImm(Reg)
1789 }
1790 }
1791
1792 // Realign stack after we pushed callee-saved registers (so that we'll be
1793 // able to calculate their offsets from the frame pointer).
1794 // Don't do this for Win64, it needs to realign the stack after the prologue.
1795 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) {
1796 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1797 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1798
1799 if (NeedsWinCFI) {
1800 HasWinCFI = true;
1801 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1802 .addImm(MaxAlign)
1804 }
1805 }
1806
1807 // If there is an SUB32ri of ESP immediately before this instruction, merge
1808 // the two. This can be the case when tail call elimination is enabled and
1809 // the callee has more arguments then the caller.
1810 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1811
1812 // Adjust stack pointer: ESP -= numbytes.
1813
1814 // Windows and cygwin/mingw require a prologue helper routine when allocating
1815 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1816 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1817 // stack and adjust the stack pointer in one go. The 64-bit version of
1818 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1819 // responsible for adjusting the stack pointer. Touching the stack at 4K
1820 // increments is necessary to ensure that the guard pages used by the OS
1821 // virtual memory manager are allocated in correct sequence.
1822 uint64_t AlignedNumBytes = NumBytes;
1823 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1824 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1825 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1826 assert(!X86FI->getUsesRedZone() &&
1827 "The Red Zone is not accounted for in stack probes");
1828
1829 // Check whether EAX is livein for this block.
1830 bool isEAXAlive = isEAXLiveIn(MBB);
1831
1832 if (isEAXAlive) {
1833 if (Is64Bit) {
1834 // Save RAX
1835 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1836 .addReg(X86::RAX, RegState::Kill)
1838 } else {
1839 // Save EAX
1840 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1841 .addReg(X86::EAX, RegState::Kill)
1843 }
1844 }
1845
1846 if (Is64Bit) {
1847 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1848 // Function prologue is responsible for adjusting the stack pointer.
1849 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1850 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1851 .addImm(Alloc)
1853 } else {
1854 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1855 // We'll also use 4 already allocated bytes for EAX.
1856 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1857 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1859 }
1860
1861 // Call __chkstk, __chkstk_ms, or __alloca.
1862 emitStackProbe(MF, MBB, MBBI, DL, true);
1863
1864 if (isEAXAlive) {
1865 // Restore RAX/EAX
1867 if (Is64Bit)
1868 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
1869 StackPtr, false, NumBytes - 8);
1870 else
1871 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
1872 StackPtr, false, NumBytes - 4);
1873 MI->setFlag(MachineInstr::FrameSetup);
1874 MBB.insert(MBBI, MI);
1875 }
1876 } else if (NumBytes) {
1877 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
1878 }
1879
1880 if (NeedsWinCFI && NumBytes) {
1881 HasWinCFI = true;
1882 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
1883 .addImm(NumBytes)
1885 }
1886
1887 int SEHFrameOffset = 0;
1888 unsigned SPOrEstablisher;
1889 if (IsFunclet) {
1890 if (IsClrFunclet) {
1891 // The establisher parameter passed to a CLR funclet is actually a pointer
1892 // to the (mostly empty) frame of its nearest enclosing funclet; we have
1893 // to find the root function establisher frame by loading the PSPSym from
1894 // the intermediate frame.
1895 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
1896 MachinePointerInfo NoInfo;
1897 MBB.addLiveIn(Establisher);
1898 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
1899 Establisher, false, PSPSlotOffset)
1902 ;
1903 // Save the root establisher back into the current funclet's (mostly
1904 // empty) frame, in case a sub-funclet or the GC needs it.
1905 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
1906 false, PSPSlotOffset)
1907 .addReg(Establisher)
1909 NoInfo,
1912 }
1913 SPOrEstablisher = Establisher;
1914 } else {
1915 SPOrEstablisher = StackPtr;
1916 }
1917
1918 if (IsWin64Prologue && HasFP) {
1919 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
1920 // this calculation on the incoming establisher, which holds the value of
1921 // RSP from the parent frame at the end of the prologue.
1922 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
1923 if (SEHFrameOffset)
1924 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
1925 SPOrEstablisher, false, SEHFrameOffset);
1926 else
1927 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
1928 .addReg(SPOrEstablisher);
1929
1930 // If this is not a funclet, emit the CFI describing our frame pointer.
1931 if (NeedsWinCFI && !IsFunclet) {
1932 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
1933 HasWinCFI = true;
1934 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1936 .addImm(SEHFrameOffset)
1938 if (isAsynchronousEHPersonality(Personality))
1939 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
1940 }
1941 } else if (IsFunclet && STI.is32Bit()) {
1942 // Reset EBP / ESI to something good for funclets.
1944 // If we're a catch funclet, we can be returned to via catchret. Save ESP
1945 // into the registration node so that the runtime will restore it for us.
1946 if (!MBB.isCleanupFuncletEntry()) {
1947 assert(Personality == EHPersonality::MSVC_CXX);
1948 Register FrameReg;
1950 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
1951 // ESP is the first field, so no extra displacement is needed.
1952 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
1953 false, EHRegOffset)
1954 .addReg(X86::ESP);
1955 }
1956 }
1957
1958 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
1959 const MachineInstr &FrameInstr = *MBBI;
1960 ++MBBI;
1961
1962 if (NeedsWinCFI) {
1963 int FI;
1964 if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
1965 if (X86::FR64RegClass.contains(Reg)) {
1966 int Offset;
1967 Register IgnoredFrameReg;
1968 if (IsWin64Prologue && IsFunclet)
1969 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
1970 else
1971 Offset =
1972 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
1973 SEHFrameOffset;
1974
1975 HasWinCFI = true;
1976 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
1977 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
1978 .addImm(Reg)
1979 .addImm(Offset)
1981 }
1982 }
1983 }
1984 }
1985
1986 if (NeedsWinCFI && HasWinCFI)
1987 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
1989
1990 if (FnHasClrFunclet && !IsFunclet) {
1991 // Save the so-called Initial-SP (i.e. the value of the stack pointer
1992 // immediately after the prolog) into the PSPSlot so that funclets
1993 // and the GC can recover it.
1994 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
1995 auto PSPInfo = MachinePointerInfo::getFixedStack(
1997 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
1998 PSPSlotOffset)
2003 }
2004
2005 // Realign stack after we spilled callee-saved registers (so that we'll be
2006 // able to calculate their offsets from the frame pointer).
2007 // Win64 requires aligning the stack after the prologue.
2008 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2009 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2010 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2011 }
2012
2013 // We already dealt with stack realignment and funclets above.
2014 if (IsFunclet && STI.is32Bit())
2015 return;
2016
2017 // If we need a base pointer, set it up here. It's whatever the value
2018 // of the stack pointer is at this point. Any variable size objects
2019 // will be allocated after this, so we can still use the base pointer
2020 // to reference locals.
2021 if (TRI->hasBasePointer(MF)) {
2022 // Update the base pointer with the current stack pointer.
2023 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2024 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2025 .addReg(SPOrEstablisher)
2027 if (X86FI->getRestoreBasePointer()) {
2028 // Stash value of base pointer. Saving RSP instead of EBP shortens
2029 // dependence chain. Used by SjLj EH.
2030 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2031 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
2032 FramePtr, true, X86FI->getRestoreBasePointerOffset())
2033 .addReg(SPOrEstablisher)
2035 }
2036
2037 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2038 // Stash the value of the frame pointer relative to the base pointer for
2039 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2040 // it recovers the frame pointer from the base pointer rather than the
2041 // other way around.
2042 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2043 Register UsedReg;
2044 int Offset =
2045 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2046 .getFixed();
2047 assert(UsedReg == BasePtr);
2048 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2051 }
2052 }
2053
2054 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2055 // Mark end of stack pointer adjustment.
2056 if (!HasFP && NumBytes) {
2057 // Define the current CFA rule to use the provided offset.
2058 assert(StackSize);
2059 BuildCFI(
2060 MBB, MBBI, DL,
2061 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2063 }
2064
2065 // Emit DWARF info specifying the offsets of the callee-saved registers.
2067 }
2068
2069 // X86 Interrupt handling function cannot assume anything about the direction
2070 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2071 // in each prologue of interrupt handler function.
2072 //
2073 // FIXME: Create "cld" instruction only in these cases:
2074 // 1. The interrupt handling function uses any of the "rep" instructions.
2075 // 2. Interrupt handling function calls another function.
2076 //
2078 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2080
2081 // At this point we know if the function has WinCFI or not.
2082 MF.setHasWinCFI(HasWinCFI);
2083}
2084
2086 const MachineFunction &MF) const {
2087 // We can't use LEA instructions for adjusting the stack pointer if we don't
2088 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2089 // to deallocate the stack.
2090 // This means that we can use LEA for SP in two situations:
2091 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2092 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2093 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2094}
2095
2097 switch (MI.getOpcode()) {
2098 case X86::CATCHRET:
2099 case X86::CLEANUPRET:
2100 return true;
2101 default:
2102 return false;
2103 }
2104 llvm_unreachable("impossible");
2105}
2106
2107// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2108// stack. It holds a pointer to the bottom of the root function frame. The
2109// establisher frame pointer passed to a nested funclet may point to the
2110// (mostly empty) frame of its parent funclet, but it will need to find
2111// the frame of the root function to access locals. To facilitate this,
2112// every funclet copies the pointer to the bottom of the root function
2113// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2114// same offset for the PSPSym in the root function frame that's used in the
2115// funclets' frames allows each funclet to dynamically accept any ancestor
2116// frame as its establisher argument (the runtime doesn't guarantee the
2117// immediate parent for some reason lost to history), and also allows the GC,
2118// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2119// frame with only a single offset reported for the entire method.
2120unsigned
2121X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2122 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2123 Register SPReg;
2124 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2125 /*IgnoreSPUpdates*/ true)
2126 .getFixed();
2127 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2128 return static_cast<unsigned>(Offset);
2129}
2130
2131unsigned
2132X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2134 // This is the size of the pushed CSRs.
2135 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2136 // This is the size of callee saved XMMs.
2137 const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2138 unsigned XMMSize = WinEHXMMSlotInfo.size() *
2139 TRI->getSpillSize(X86::VR128RegClass);
2140 // This is the amount of stack a funclet needs to allocate.
2141 unsigned UsedSize;
2142 EHPersonality Personality =
2144 if (Personality == EHPersonality::CoreCLR) {
2145 // CLR funclets need to hold enough space to include the PSPSym, at the
2146 // same offset from the stack pointer (immediately after the prolog) as it
2147 // resides at in the main function.
2148 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2149 } else {
2150 // Other funclets just need enough stack for outgoing call arguments.
2151 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2152 }
2153 // RBP is not included in the callee saved register block. After pushing RBP,
2154 // everything is 16 byte aligned. Everything we allocate before an outgoing
2155 // call must also be 16 byte aligned.
2156 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2157 // Subtract out the size of the callee saved registers. This is how much stack
2158 // each funclet will allocate.
2159 return FrameSizeMinusRBP + XMMSize - CSSize;
2160}
2161
2162static bool isTailCallOpcode(unsigned Opc) {
2163 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2164 Opc == X86::TCRETURNmi ||
2165 Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 ||
2166 Opc == X86::TCRETURNmi64;
2167}
2168
2170 MachineBasicBlock &MBB) const {
2171 const MachineFrameInfo &MFI = MF.getFrameInfo();
2174 MachineBasicBlock::iterator MBBI = Terminator;
2175 DebugLoc DL;
2176 if (MBBI != MBB.end())
2177 DL = MBBI->getDebugLoc();
2178 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2179 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2181 Register MachineFramePtr =
2182 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2183
2184 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2185 bool NeedsWin64CFI =
2186 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2187 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2188
2189 // Get the number of bytes to allocate from the FrameInfo.
2190 uint64_t StackSize = MFI.getStackSize();
2191 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2192 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2193 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2194 bool HasFP = hasFP(MF);
2195 uint64_t NumBytes = 0;
2196
2197 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2199 MF.needsFrameMoves();
2200
2201 if (IsFunclet) {
2202 assert(HasFP && "EH funclets without FP not yet implemented");
2203 NumBytes = getWinEHFuncletFrameSize(MF);
2204 } else if (HasFP) {
2205 // Calculate required stack adjustment.
2206 uint64_t FrameSize = StackSize - SlotSize;
2207 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2208
2209 // Callee-saved registers were pushed on stack before the stack was
2210 // realigned.
2211 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2212 NumBytes = alignTo(FrameSize, MaxAlign);
2213 } else {
2214 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2215 }
2216 uint64_t SEHStackAllocAmt = NumBytes;
2217
2218 // AfterPop is the position to insert .cfi_restore.
2220 if (HasFP) {
2221 if (X86FI->hasSwiftAsyncContext()) {
2222 // Discard the context.
2223 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2224 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
2225 }
2226 // Pop EBP.
2227 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
2228 MachineFramePtr)
2230
2231 // We need to reset FP to its untagged state on return. Bit 60 is currently
2232 // used to show the presence of an extended frame.
2233 if (X86FI->hasSwiftAsyncContext()) {
2234 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8),
2235 MachineFramePtr)
2236 .addUse(MachineFramePtr)
2237 .addImm(60)
2239 }
2240
2241 if (NeedsDwarfCFI) {
2242 unsigned DwarfStackPtr =
2243 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2244 BuildCFI(MBB, MBBI, DL,
2245 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2247 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2248 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2249 BuildCFI(MBB, AfterPop, DL,
2250 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2252 --MBBI;
2253 --AfterPop;
2254 }
2255 --MBBI;
2256 }
2257 }
2258
2259 MachineBasicBlock::iterator FirstCSPop = MBBI;
2260 // Skip the callee-saved pop instructions.
2261 while (MBBI != MBB.begin()) {
2262 MachineBasicBlock::iterator PI = std::prev(MBBI);
2263 unsigned Opc = PI->getOpcode();
2264
2265 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2266 if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2267 (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2268 (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2269 (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)))
2270 break;
2271 FirstCSPop = PI;
2272 }
2273
2274 --MBBI;
2275 }
2276 MBBI = FirstCSPop;
2277
2278 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2279 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2280
2281 if (MBBI != MBB.end())
2282 DL = MBBI->getDebugLoc();
2283 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2284 // instruction, merge the two instructions.
2285 if (NumBytes || MFI.hasVarSizedObjects())
2286 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2287
2288 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2289 // slot before popping them off! Same applies for the case, when stack was
2290 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2291 // will not do realignment or dynamic stack allocation.
2292 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2293 !IsFunclet) {
2294 if (TRI->hasStackRealignment(MF))
2295 MBBI = FirstCSPop;
2296 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2297 uint64_t LEAAmount =
2298 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2299
2300 if (X86FI->hasSwiftAsyncContext())
2301 LEAAmount -= 16;
2302
2303 // There are only two legal forms of epilogue:
2304 // - add SEHAllocationSize, %rsp
2305 // - lea SEHAllocationSize(%FramePtr), %rsp
2306 //
2307 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2308 // However, we may use this sequence if we have a frame pointer because the
2309 // effects of the prologue can safely be undone.
2310 if (LEAAmount != 0) {
2311 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2312 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
2313 FramePtr, false, LEAAmount);
2314 --MBBI;
2315 } else {
2316 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2317 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
2318 .addReg(FramePtr);
2319 --MBBI;
2320 }
2321 } else if (NumBytes) {
2322 // Adjust stack pointer back: ESP += numbytes.
2323 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2324 if (!HasFP && NeedsDwarfCFI) {
2325 // Define the current CFA rule to use the provided offset.
2326 BuildCFI(MBB, MBBI, DL,
2328 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2330 }
2331 --MBBI;
2332 }
2333
2334 // Windows unwinder will not invoke function's exception handler if IP is
2335 // either in prologue or in epilogue. This behavior causes a problem when a
2336 // call immediately precedes an epilogue, because the return address points
2337 // into the epilogue. To cope with that, we insert an epilogue marker here,
2338 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2339 // final emitted code.
2340 if (NeedsWin64CFI && MF.hasWinCFI())
2341 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2342
2343 if (!HasFP && NeedsDwarfCFI) {
2344 MBBI = FirstCSPop;
2345 int64_t Offset = -CSSize - SlotSize;
2346 // Mark callee-saved pop instruction.
2347 // Define the current CFA rule to use the provided offset.
2348 while (MBBI != MBB.end()) {
2350 unsigned Opc = PI->getOpcode();
2351 ++MBBI;
2352 if (Opc == X86::POP32r || Opc == X86::POP64r) {
2353 Offset += SlotSize;
2354 BuildCFI(MBB, MBBI, DL,
2357 }
2358 }
2359 }
2360
2361 // Emit DWARF info specifying the restores of the callee-saved registers.
2362 // For epilogue with return inside or being other block without successor,
2363 // no need to generate .cfi_restore for callee-saved registers.
2364 if (NeedsDwarfCFI && !MBB.succ_empty())
2365 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2366
2367 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2368 // Add the return addr area delta back since we are not tail calling.
2369 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2370 assert(Offset >= 0 && "TCDelta should never be positive");
2371 if (Offset) {
2372 // Check for possible merge with preceding ADD instruction.
2373 Offset += mergeSPUpdates(MBB, Terminator, true);
2374 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2375 }
2376 }
2377
2378 // Emit tilerelease for AMX kernel.
2379 if (X86FI->hasVirtualTileReg())
2380 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2381}
2382
2384 int FI,
2385 Register &FrameReg) const {
2386 const MachineFrameInfo &MFI = MF.getFrameInfo();
2387
2388 bool IsFixed = MFI.isFixedObjectIndex(FI);
2389 // We can't calculate offset from frame pointer if the stack is realigned,
2390 // so enforce usage of stack/base pointer. The base pointer is used when we
2391 // have dynamic allocas in addition to dynamic realignment.
2392 if (TRI->hasBasePointer(MF))
2393 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2394 else if (TRI->hasStackRealignment(MF))
2395 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2396 else
2397 FrameReg = TRI->getFrameRegister(MF);
2398
2399 // Offset will hold the offset from the stack pointer at function entry to the
2400 // object.
2401 // We need to factor in additional offsets applied during the prologue to the
2402 // frame, base, and stack pointer depending on which is used.
2405 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2406 uint64_t StackSize = MFI.getStackSize();
2407 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2408 int64_t FPDelta = 0;
2409
2410 // In an x86 interrupt, remove the offset we added to account for the return
2411 // address from any stack object allocated in the caller's frame. Interrupts
2412 // do not have a standard return address. Fixed objects in the current frame,
2413 // such as SSE register spills, should not get this treatment.
2415 Offset >= 0) {
2417 }
2418
2419 if (IsWin64Prologue) {
2420 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2421
2422 // Calculate required stack adjustment.
2423 uint64_t FrameSize = StackSize - SlotSize;
2424 // If required, include space for extra hidden slot for stashing base pointer.
2425 if (X86FI->getRestoreBasePointer())
2426 FrameSize += SlotSize;
2427 uint64_t NumBytes = FrameSize - CSSize;
2428
2429 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2430 if (FI && FI == X86FI->getFAIndex())
2431 return StackOffset::getFixed(-SEHFrameOffset);
2432
2433 // FPDelta is the offset from the "traditional" FP location of the old base
2434 // pointer followed by return address and the location required by the
2435 // restricted Win64 prologue.
2436 // Add FPDelta to all offsets below that go through the frame pointer.
2437 FPDelta = FrameSize - SEHFrameOffset;
2438 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2439 "FPDelta isn't aligned per the Win64 ABI!");
2440 }
2441
2442 if (FrameReg == TRI->getFramePtr()) {
2443 // Skip saved EBP/RBP
2444 Offset += SlotSize;
2445
2446 // Account for restricted Windows prologue.
2447 Offset += FPDelta;
2448
2449 // Skip the RETADDR move area
2450 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2451 if (TailCallReturnAddrDelta < 0)
2452 Offset -= TailCallReturnAddrDelta;
2453
2455 }
2456
2457 // FrameReg is either the stack pointer or a base pointer. But the base is
2458 // located at the end of the statically known StackSize so the distinction
2459 // doesn't really matter.
2460 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2461 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2462 return StackOffset::getFixed(Offset + StackSize);
2463}
2464
2466 Register &FrameReg) const {
2467 const MachineFrameInfo &MFI = MF.getFrameInfo();
2469 const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2470 const auto it = WinEHXMMSlotInfo.find(FI);
2471
2472 if (it == WinEHXMMSlotInfo.end())
2473 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2474
2475 FrameReg = TRI->getStackRegister();
2477 it->second;
2478}
2479
2482 Register &FrameReg,
2483 int Adjustment) const {
2484 const MachineFrameInfo &MFI = MF.getFrameInfo();
2485 FrameReg = TRI->getStackRegister();
2486 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2487 getOffsetOfLocalArea() + Adjustment);
2488}
2489
2492 int FI, Register &FrameReg,
2493 bool IgnoreSPUpdates) const {
2494
2495 const MachineFrameInfo &MFI = MF.getFrameInfo();
2496 // Does not include any dynamic realign.
2497 const uint64_t StackSize = MFI.getStackSize();
2498 // LLVM arranges the stack as follows:
2499 // ...
2500 // ARG2
2501 // ARG1
2502 // RETADDR
2503 // PUSH RBP <-- RBP points here
2504 // PUSH CSRs
2505 // ~~~~~~~ <-- possible stack realignment (non-win64)
2506 // ...
2507 // STACK OBJECTS
2508 // ... <-- RSP after prologue points here
2509 // ~~~~~~~ <-- possible stack realignment (win64)
2510 //
2511 // if (hasVarSizedObjects()):
2512 // ... <-- "base pointer" (ESI/RBX) points here
2513 // DYNAMIC ALLOCAS
2514 // ... <-- RSP points here
2515 //
2516 // Case 1: In the simple case of no stack realignment and no dynamic
2517 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2518 // with fixed offsets from RSP.
2519 //
2520 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2521 // stack objects are addressed with RBP and regular stack objects with RSP.
2522 //
2523 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2524 // to address stack arguments for outgoing calls and nothing else. The "base
2525 // pointer" points to local variables, and RBP points to fixed objects.
2526 //
2527 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2528 // answer we give is relative to the SP after the prologue, and not the
2529 // SP in the middle of the function.
2530
2531 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2532 !STI.isTargetWin64())
2533 return getFrameIndexReference(MF, FI, FrameReg);
2534
2535 // If !hasReservedCallFrame the function might have SP adjustement in the
2536 // body. So, even though the offset is statically known, it depends on where
2537 // we are in the function.
2538 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2539 return getFrameIndexReference(MF, FI, FrameReg);
2540
2541 // We don't handle tail calls, and shouldn't be seeing them either.
2543 "we don't handle this case!");
2544
2545 // This is how the math works out:
2546 //
2547 // %rsp grows (i.e. gets lower) left to right. Each box below is
2548 // one word (eight bytes). Obj0 is the stack slot we're trying to
2549 // get to.
2550 //
2551 // ----------------------------------
2552 // | BP | Obj0 | Obj1 | ... | ObjN |
2553 // ----------------------------------
2554 // ^ ^ ^ ^
2555 // A B C E
2556 //
2557 // A is the incoming stack pointer.
2558 // (B - A) is the local area offset (-8 for x86-64) [1]
2559 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2560 //
2561 // |(E - B)| is the StackSize (absolute value, positive). For a
2562 // stack that grown down, this works out to be (B - E). [3]
2563 //
2564 // E is also the value of %rsp after stack has been set up, and we
2565 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2566 // (C - E) == (C - A) - (B - A) + (B - E)
2567 // { Using [1], [2] and [3] above }
2568 // == getObjectOffset - LocalAreaOffset + StackSize
2569
2570 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2571}
2572
2575 std::vector<CalleeSavedInfo> &CSI) const {
2576 MachineFrameInfo &MFI = MF.getFrameInfo();
2578
2579 unsigned CalleeSavedFrameSize = 0;
2580 unsigned XMMCalleeSavedFrameSize = 0;
2581 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2582 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2583
2584 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2585
2586 if (TailCallReturnAddrDelta < 0) {
2587 // create RETURNADDR area
2588 // arg
2589 // arg
2590 // RETADDR
2591 // { ...
2592 // RETADDR area
2593 // ...
2594 // }
2595 // [EBP]
2596 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2597 TailCallReturnAddrDelta - SlotSize, true);
2598 }
2599
2600 // Spill the BasePtr if it's used.
2601 if (this->TRI->hasBasePointer(MF)) {
2602 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2603 if (MF.hasEHFunclets()) {
2605 X86FI->setHasSEHFramePtrSave(true);
2606 X86FI->setSEHFramePtrSaveIndex(FI);
2607 }
2608 }
2609
2610 if (hasFP(MF)) {
2611 // emitPrologue always spills frame register the first thing.
2612 SpillSlotOffset -= SlotSize;
2613 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2614
2615 // The async context lives directly before the frame pointer, and we
2616 // allocate a second slot to preserve stack alignment.
2617 if (X86FI->hasSwiftAsyncContext()) {
2618 SpillSlotOffset -= SlotSize;
2619 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2620 SpillSlotOffset -= SlotSize;
2621 }
2622
2623 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2624 // the frame register, we can delete it from CSI list and not have to worry
2625 // about avoiding it later.
2626 Register FPReg = TRI->getFrameRegister(MF);
2627 for (unsigned i = 0; i < CSI.size(); ++i) {
2628 if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
2629 CSI.erase(CSI.begin() + i);
2630 break;
2631 }
2632 }
2633 }
2634
2635 // Assign slots for GPRs. It increases frame size.
2636 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2637 Register Reg = I.getReg();
2638
2639 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2640 continue;
2641
2642 SpillSlotOffset -= SlotSize;
2643 CalleeSavedFrameSize += SlotSize;
2644
2645 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2646 I.setFrameIdx(SlotIndex);
2647 }
2648
2649 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2650 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2651
2652 // Assign slots for XMMs.
2653 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2654 Register Reg = I.getReg();
2655 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2656 continue;
2657
2658 // If this is k-register make sure we lookup via the largest legal type.
2659 MVT VT = MVT::Other;
2660 if (X86::VK16RegClass.contains(Reg))
2661 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2662
2663 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2664 unsigned Size = TRI->getSpillSize(*RC);
2665 Align Alignment = TRI->getSpillAlign(*RC);
2666 // ensure alignment
2667 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2668 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2669
2670 // spill into slot
2671 SpillSlotOffset -= Size;
2672 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2673 I.setFrameIdx(SlotIndex);
2674 MFI.ensureMaxAlignment(Alignment);
2675
2676 // Save the start offset and size of XMM in stack frame for funclets.
2677 if (X86::VR128RegClass.contains(Reg)) {
2678 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2679 XMMCalleeSavedFrameSize += Size;
2680 }
2681 }
2682
2683 return true;
2684}
2685
2690
2691 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2692 // for us, and there are no XMM CSRs on Win32.
2693 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2694 return true;
2695
2696 // Push GPRs. It increases frame size.
2697 const MachineFunction &MF = *MBB.getParent();
2698 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
2699 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
2700 Register Reg = I.getReg();
2701
2702 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2703 continue;
2704
2705 const MachineRegisterInfo &MRI = MF.getRegInfo();
2706 bool isLiveIn = MRI.isLiveIn(Reg);
2707 if (!isLiveIn)
2708 MBB.addLiveIn(Reg);
2709
2710 // Decide whether we can add a kill flag to the use.
2711 bool CanKill = !isLiveIn;
2712 // Check if any subregister is live-in
2713 if (CanKill) {
2714 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) {
2715 if (MRI.isLiveIn(*AReg)) {
2716 CanKill = false;
2717 break;
2718 }
2719 }
2720 }
2721
2722 // Do not set a kill flag on values that are also marked as live-in. This
2723 // happens with the @llvm-returnaddress intrinsic and with arguments
2724 // passed in callee saved registers.
2725 // Omitting the kill flags is conservatively correct even if the live-in
2726 // is not used after all.
2727 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill))
2729 }
2730
2731 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
2732 // It can be done by spilling XMMs to stack frame.
2733 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
2734 Register Reg = I.getReg();
2735 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2736 continue;
2737
2738 // If this is k-register make sure we lookup via the largest legal type.
2739 MVT VT = MVT::Other;
2740 if (X86::VK16RegClass.contains(Reg))
2741 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2742
2743 // Add the callee-saved register as live-in. It's killed at the spill.
2744 MBB.addLiveIn(Reg);
2745 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2746
2747 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
2748 Register());
2749 --MI;
2750 MI->setFlag(MachineInstr::FrameSetup);
2751 ++MI;
2752 }
2753
2754 return true;
2755}
2756
2757void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
2759 MachineInstr *CatchRet) const {
2760 // SEH shouldn't use catchret.
2763 "SEH should not use CATCHRET");
2764 const DebugLoc &DL = CatchRet->getDebugLoc();
2765 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
2766
2767 // Fill EAX/RAX with the address of the target block.
2768 if (STI.is64Bit()) {
2769 // LEA64r CatchRetTarget(%rip), %rax
2770 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
2771 .addReg(X86::RIP)
2772 .addImm(0)
2773 .addReg(0)
2774 .addMBB(CatchRetTarget)
2775 .addReg(0);
2776 } else {
2777 // MOV32ri $CatchRetTarget, %eax
2778 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2779 .addMBB(CatchRetTarget);
2780 }
2781
2782 // Record that we've taken the address of CatchRetTarget and no longer just
2783 // reference it in a terminator.
2784 CatchRetTarget->setMachineBlockAddressTaken();
2785}
2786
2790 if (CSI.empty())
2791 return false;
2792
2793 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
2794 // Don't restore CSRs in 32-bit EH funclets. Matches
2795 // spillCalleeSavedRegisters.
2796 if (STI.is32Bit())
2797 return true;
2798 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
2799 // funclets. emitEpilogue transforms these to normal jumps.
2800 if (MI->getOpcode() == X86::CATCHRET) {
2801 const Function &F = MBB.getParent()->getFunction();
2802 bool IsSEH = isAsynchronousEHPersonality(
2803 classifyEHPersonality(F.getPersonalityFn()));
2804 if (IsSEH)
2805 return true;
2806 }
2807 }
2808
2810
2811 // Reload XMMs from stack frame.
2812 for (const CalleeSavedInfo &I : CSI) {
2813 Register Reg = I.getReg();
2814 if (X86::GR64RegClass.contains(Reg) ||
2815 X86::GR32RegClass.contains(Reg))
2816 continue;
2817
2818 // If this is k-register make sure we lookup via the largest legal type.
2819 MVT VT = MVT::Other;
2820 if (X86::VK16RegClass.contains(Reg))
2821 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2822
2823 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2824 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
2825 Register());
2826 }
2827
2828 // POP GPRs.
2829 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
2830 for (const CalleeSavedInfo &I : CSI) {
2831 Register Reg = I.getReg();
2832 if (!X86::GR64RegClass.contains(Reg) &&
2833 !X86::GR32RegClass.contains(Reg))
2834 continue;
2835
2836 BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
2838 }
2839 return true;
2840}
2841
2843 BitVector &SavedRegs,
2844 RegScavenger *RS) const {
2846
2847 // Spill the BasePtr if it's used.
2848 if (TRI->hasBasePointer(MF)){
2849 Register BasePtr = TRI->getBaseRegister();
2850 if (STI.isTarget64BitILP32())
2851 BasePtr = getX86SubSuperRegister(BasePtr, 64);
2852 SavedRegs.set(BasePtr);
2853 }
2854}
2855
2856static bool
2858 const Function &F = MF->getFunction();
2859 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
2860 I != E; I++) {
2861 if (I->hasNestAttr() && !I->use_empty())
2862 return true;
2863 }
2864 return false;
2865}
2866
2867/// GetScratchRegister - Get a temp register for performing work in the
2868/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
2869/// and the properties of the function either one or two registers will be
2870/// needed. Set primary to true for the first register, false for the second.
2871static unsigned
2872GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
2874
2875 // Erlang stuff.
2877 if (Is64Bit)
2878 return Primary ? X86::R14 : X86::R13;
2879 else
2880 return Primary ? X86::EBX : X86::EDI;
2881 }
2882
2883 if (Is64Bit) {
2884 if (IsLP64)
2885 return Primary ? X86::R11 : X86::R12;
2886 else
2887 return Primary ? X86::R11D : X86::R12D;
2888 }
2889
2890 bool IsNested = HasNestArgument(&MF);
2891
2895 if (IsNested)
2896 report_fatal_error("Segmented stacks does not support fastcall with "
2897 "nested function.");
2898 return Primary ? X86::EAX : X86::ECX;
2899 }
2900 if (IsNested)
2901 return Primary ? X86::EDX : X86::EAX;
2902 return Primary ? X86::ECX : X86::EAX;
2903}
2904
2905// The stack limit in the TCB is set to this many bytes above the actual stack
2906// limit.
2908
2910 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2911 MachineFrameInfo &MFI = MF.getFrameInfo();
2912 uint64_t StackSize;
2913 unsigned TlsReg, TlsOffset;
2914 DebugLoc DL;
2915
2916 // To support shrink-wrapping we would need to insert the new blocks
2917 // at the right place and update the branches to PrologueMBB.
2918 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
2919
2920 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
2921 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
2922 "Scratch register is live-in");
2923
2924 if (MF.getFunction().isVarArg())
2925 report_fatal_error("Segmented stacks do not support vararg functions.");
2926 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
2929 report_fatal_error("Segmented stacks not supported on this platform.");
2930
2931 // Eventually StackSize will be calculated by a link-time pass; which will
2932 // also decide whether checking code needs to be injected into this particular
2933 // prologue.
2934 StackSize = MFI.getStackSize();
2935
2936 if (!MFI.needsSplitStackProlog())
2937 return;
2938
2942 bool IsNested = false;
2943
2944 // We need to know if the function has a nest argument only in 64 bit mode.
2945 if (Is64Bit)
2946 IsNested = HasNestArgument(&MF);
2947
2948 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
2949 // allocMBB needs to be last (terminating) instruction.
2950
2951 for (const auto &LI : PrologueMBB.liveins()) {
2952 allocMBB->addLiveIn(LI);
2953 checkMBB->addLiveIn(LI);
2954 }
2955
2956 if (IsNested)
2957 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
2958
2959 MF.push_front(allocMBB);
2960 MF.push_front(checkMBB);
2961
2962 // When the frame size is less than 256 we just compare the stack
2963 // boundary directly to the value of the stack pointer, per gcc.
2964 bool CompareStackPointer = StackSize < kSplitStackAvailable;
2965
2966 // Read the limit off the current stacklet off the stack_guard location.
2967 if (Is64Bit) {
2968 if (STI.isTargetLinux()) {
2969 TlsReg = X86::FS;
2970 TlsOffset = IsLP64 ? 0x70 : 0x40;
2971 } else if (STI.isTargetDarwin()) {
2972 TlsReg = X86::GS;
2973 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
2974 } else if (STI.isTargetWin64()) {
2975 TlsReg = X86::GS;
2976 TlsOffset = 0x28; // pvArbitrary, reserved for application use
2977 } else if (STI.isTargetFreeBSD()) {
2978 TlsReg = X86::FS;
2979 TlsOffset = 0x18;
2980 } else if (STI.isTargetDragonFly()) {
2981 TlsReg = X86::FS;
2982 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
2983 } else {
2984 report_fatal_error("Segmented stacks not supported on this platform.");
2985 }
2986
2987 if (CompareStackPointer)
2988 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
2989 else
2990 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
2991 .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
2992
2993 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
2994 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
2995 } else {
2996 if (STI.isTargetLinux()) {
2997 TlsReg = X86::GS;
2998 TlsOffset = 0x30;
2999 } else if (STI.isTargetDarwin()) {
3000 TlsReg = X86::GS;
3001 TlsOffset = 0x48 + 90*4;
3002 } else if (STI.isTargetWin32()) {
3003 TlsReg = X86::FS;
3004 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3005 } else if (STI.isTargetDragonFly()) {
3006 TlsReg = X86::FS;
3007 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3008 } else if (STI.isTargetFreeBSD()) {
3009 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3010 } else {
3011 report_fatal_error("Segmented stacks not supported on this platform.");
3012 }
3013
3014 if (CompareStackPointer)
3015 ScratchReg = X86::ESP;
3016 else
3017 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
3018 .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
3019
3022 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
3023 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
3024 } else if (STI.isTargetDarwin()) {
3025
3026 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3027 unsigned ScratchReg2;
3028 bool SaveScratch2;
3029 if (CompareStackPointer) {
3030 // The primary scratch register is available for holding the TLS offset.
3031 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3032 SaveScratch2 = false;
3033 } else {
3034 // Need to use a second register to hold the TLS offset
3035 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3036
3037 // Unfortunately, with fastcc the second scratch register may hold an
3038 // argument.
3039 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3040 }
3041
3042 // If Scratch2 is live-in then it needs to be saved.
3043 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3044 "Scratch register is live-in and not saved");
3045
3046 if (SaveScratch2)
3047 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3048 .addReg(ScratchReg2, RegState::Kill);
3049
3050 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3051 .addImm(TlsOffset);
3052 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3053 .addReg(ScratchReg)
3054 .addReg(ScratchReg2).addImm(1).addReg(0)
3055 .addImm(0)
3056 .addReg(TlsReg);
3057
3058 if (SaveScratch2)
3059 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3060 }
3061 }
3062
3063 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3064 // It jumps to normal execution of the function body.
3065 BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
3066
3067 // On 32 bit we first push the arguments size and then the frame size. On 64
3068 // bit, we pass the stack frame size in r10 and the argument size in r11.
3069 if (Is64Bit) {
3070 // Functions with nested arguments use R10, so it needs to be saved across
3071 // the call to _morestack
3072
3073 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3074 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3075 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3076 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3077
3078 if (IsNested)
3079 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3080
3081 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3082 .addImm(StackSize);
3083 BuildMI(allocMBB, DL,
3085 Reg11)
3086 .addImm(X86FI->getArgumentStackSize());
3087 } else {
3088 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
3089 .addImm(X86FI->getArgumentStackSize());
3090 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
3091 .addImm(StackSize);
3092 }
3093
3094 // __morestack is in libgcc
3096 // Under the large code model, we cannot assume that __morestack lives
3097 // within 2^31 bytes of the call site, so we cannot use pc-relative
3098 // addressing. We cannot perform the call via a temporary register,
3099 // as the rax register may be used to store the static chain, and all
3100 // other suitable registers may be either callee-save or used for
3101 // parameter passing. We cannot use the stack at this point either
3102 // because __morestack manipulates the stack directly.
3103 //
3104 // To avoid these issues, perform an indirect call via a read-only memory
3105 // location containing the address.
3106 //
3107 // This solution is not perfect, as it assumes that the .rodata section
3108 // is laid out within 2^31 bytes of each function body, but this seems
3109 // to be sufficient for JIT.
3110 // FIXME: Add retpoline support and remove the error here..
3112 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3113 "code model and thunks not yet implemented.");
3114 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3115 .addReg(X86::RIP)
3116 .addImm(0)
3117 .addReg(0)
3118 .addExternalSymbol("__morestack_addr")
3119 .addReg(0);
3120 } else {
3121 if (Is64Bit)
3122 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3123 .addExternalSymbol("__morestack");
3124 else
3125 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3126 .addExternalSymbol("__morestack");
3127 }
3128
3129 if (IsNested)
3130 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3131 else
3132 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3133
3134 allocMBB->addSuccessor(&PrologueMBB);
3135
3136 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3137 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3138
3139#ifdef EXPENSIVE_CHECKS
3140 MF.verify();
3141#endif
3142}
3143
3144/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3145/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3146/// to fields it needs, through a named metadata node "hipe.literals" containing
3147/// name-value pairs.
3148static unsigned getHiPELiteral(
3149 NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) {
3150 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3151 MDNode *Node = HiPELiteralsMD->getOperand(i);
3152 if (Node->getNumOperands() != 2) continue;
3153 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3154 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3155 if (!NodeName || !NodeVal) continue;
3156 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3157 if (ValConst && NodeName->getString() == LiteralName) {
3158 return ValConst->getZExtValue();
3159 }
3160 }
3161
3162 report_fatal_error("HiPE literal " + LiteralName
3163 + " required but not provided");
3164}
3165
3166// Return true if there are no non-ehpad successors to MBB and there are no
3167// non-meta instructions between MBBI and MBB.end().
3170 return llvm::all_of(
3171 MBB.successors(),
3172 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3173 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3174 return MI.isMetaInstruction();
3175 });
3176}
3177
3178/// Erlang programs may need a special prologue to handle the stack size they
3179/// might need at runtime. That is because Erlang/OTP does not implement a C
3180/// stack but uses a custom implementation of hybrid stack/heap architecture.
3181/// (for more information see Eric Stenman's Ph.D. thesis:
3182/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3183///
3184/// CheckStack:
3185/// temp0 = sp - MaxStack
3186/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3187/// OldStart:
3188/// ...
3189/// IncStack:
3190/// call inc_stack # doubles the stack space
3191/// temp0 = sp - MaxStack
3192/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3194 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3195 MachineFrameInfo &MFI = MF.getFrameInfo();
3196 DebugLoc DL;
3197
3198 // To support shrink-wrapping we would need to insert the new blocks
3199 // at the right place and update the branches to PrologueMBB.
3200 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3201
3202 // HiPE-specific values
3203 NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule()
3204 ->getNamedMetadata("hipe.literals");
3205 if (!HiPELiteralsMD)
3207 "Can't generate HiPE prologue without runtime parameters");
3208 const unsigned HipeLeafWords
3209 = getHiPELiteral(HiPELiteralsMD,
3210 Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3211 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3212 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3213 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ?
3214 MF.getFunction().arg_size() - CCRegisteredArgs : 0;
3215 unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize;
3216
3218 "HiPE prologue is only supported on Linux operating systems.");
3219
3220 // Compute the largest caller's frame that is needed to fit the callees'
3221 // frames. This 'MaxStack' is computed from:
3222 //
3223 // a) the fixed frame size, which is the space needed for all spilled temps,
3224 // b) outgoing on-stack parameter areas, and
3225 // c) the minimum stack space this function needs to make available for the
3226 // functions it calls (a tunable ABI property).
3227 if (MFI.hasCalls()) {
3228 unsigned MoreStackForCalls = 0;
3229
3230 for (auto &MBB : MF) {
3231 for (auto &MI : MBB) {
3232 if (!MI.isCall())
3233 continue;
3234
3235 // Get callee operand.
3236 const MachineOperand &MO = MI.getOperand(0);
3237
3238 // Only take account of global function calls (no closures etc.).
3239 if (!MO.isGlobal())
3240 continue;
3241
3242 const Function *F = dyn_cast<Function>(MO.getGlobal());
3243 if (!F)
3244 continue;
3245
3246 // Do not update 'MaxStack' for primitive and built-in functions
3247 // (encoded with names either starting with "erlang."/"bif_" or not
3248 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3249 // "_", such as the BIF "suspend_0") as they are executed on another
3250 // stack.
3251 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3252 F->getName().find_first_of("._") == StringRef::npos)
3253 continue;
3254
3255 unsigned CalleeStkArity =
3256 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
3257 if (HipeLeafWords - 1 > CalleeStkArity)
3258 MoreStackForCalls = std::max(MoreStackForCalls,
3259 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3260 }
3261 }
3262 MaxStack += MoreStackForCalls;
3263 }
3264
3265 // If the stack frame needed is larger than the guaranteed then runtime checks
3266 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3267 if (MaxStack > Guaranteed) {
3268 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3269 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3270
3271 for (const auto &LI : PrologueMBB.liveins()) {
3272 stackCheckMBB->addLiveIn(LI);
3273 incStackMBB->addLiveIn(LI);
3274 }
3275
3276 MF.push_front(incStackMBB);
3277 MF.push_front(stackCheckMBB);
3278
3279 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3280 unsigned LEAop, CMPop, CALLop;
3281 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3282 if (Is64Bit) {
3283 SPReg = X86::RSP;
3284 PReg = X86::RBP;
3285 LEAop = X86::LEA64r;
3286 CMPop = X86::CMP64rm;
3287 CALLop = X86::CALL64pcrel32;
3288 } else {
3289 SPReg = X86::ESP;
3290 PReg = X86::EBP;
3291 LEAop = X86::LEA32r;
3292 CMPop = X86::CMP32rm;
3293 CALLop = X86::CALLpcrel32;
3294 }
3295
3296 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3297 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3298 "HiPE prologue scratch register is live-in");
3299
3300 // Create new MBB for StackCheck:
3301 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
3302 SPReg, false, -MaxStack);
3303 // SPLimitOffset is in a fixed heap location (pointed by BP).
3304 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
3305 .addReg(ScratchReg), PReg, false, SPLimitOffset);
3306 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
3307
3308 // Create new MBB for IncStack:
3309 BuildMI(incStackMBB, DL, TII.get(CALLop)).
3310 addExternalSymbol("inc_stack_0");
3311 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
3312 SPReg, false, -MaxStack);
3313 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
3314 .addReg(ScratchReg), PReg, false, SPLimitOffset);
3315 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
3316
3317 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3318 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3319 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3320 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3321 }
3322#ifdef EXPENSIVE_CHECKS
3323 MF.verify();
3324#endif
3325}
3326
3327bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3329 const DebugLoc &DL,
3330 int Offset) const {
3331 if (Offset <= 0)
3332 return false;
3333
3334 if (Offset % SlotSize)
3335 return false;
3336
3337 int NumPops = Offset / SlotSize;
3338 // This is only worth it if we have at most 2 pops.
3339 if (NumPops != 1 && NumPops != 2)
3340 return false;
3341
3342 // Handle only the trivial case where the adjustment directly follows
3343 // a call. This is the most common one, anyway.
3344 if (MBBI == MBB.begin())
3345 return false;
3346 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3347 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3348 return false;
3349
3350 unsigned Regs[2];
3351 unsigned FoundRegs = 0;
3352
3354 const MachineOperand &RegMask = Prev->getOperand(1);
3355
3356 auto &RegClass =
3357 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3358 // Try to find up to NumPops free registers.
3359 for (auto Candidate : RegClass) {
3360 // Poor man's liveness:
3361 // Since we're immediately after a call, any register that is clobbered
3362 // by the call and not defined by it can be considered dead.
3363 if (!RegMask.clobbersPhysReg(Candidate))
3364 continue;
3365
3366 // Don't clobber reserved registers
3367 if (MRI.isReserved(Candidate))
3368 continue;
3369
3370 bool IsDef = false;
3371 for (const MachineOperand &MO : Prev->implicit_operands()) {
3372 if (MO.isReg() && MO.isDef() &&
3373 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3374 IsDef = true;
3375 break;
3376 }
3377 }
3378
3379 if (IsDef)
3380 continue;
3381
3382 Regs[FoundRegs++] = Candidate;
3383 if (FoundRegs == (unsigned)NumPops)
3384 break;
3385 }
3386
3387 if (FoundRegs == 0)
3388 return false;
3389
3390 // If we found only one free register, but need two, reuse the same one twice.
3391 while (FoundRegs < (unsigned)NumPops)
3392 Regs[FoundRegs++] = Regs[0];
3393
3394 for (int i = 0; i < NumPops; ++i)
3395 BuildMI(MBB, MBBI, DL,
3396 TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
3397
3398 return true;
3399}
3400
3404 bool reserveCallFrame = hasReservedCallFrame(MF);
3405 unsigned Opcode = I->getOpcode();
3406 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3407 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3408 uint64_t Amount = TII.getFrameSize(*I);
3409 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3410 I = MBB.erase(I);
3411 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3412
3413 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3414 // typically because the function is marked noreturn (abort, throw,
3415 // assert_fail, etc).
3416 if (isDestroy && blockEndIsUnreachable(MBB, I))
3417 return I;
3418
3419 if (!reserveCallFrame) {
3420 // If the stack pointer can be changed after prologue, turn the
3421 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3422 // adjcallstackdown instruction into 'add ESP, <amt>'
3423
3424 // We need to keep the stack aligned properly. To do this, we round the
3425 // amount of space needed for the outgoing arguments up to the next
3426 // alignment boundary.
3427 Amount = alignTo(Amount, getStackAlign());
3428
3429 const Function &F = MF.getFunction();
3430 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3431 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3432
3433 // If we have any exception handlers in this function, and we adjust
3434 // the SP before calls, we may need to indicate this to the unwinder
3435 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3436 // Amount == 0, because the preceding function may have set a non-0
3437 // GNU_ARGS_SIZE.
3438 // TODO: We don't need to reset this between subsequent functions,
3439 // if it didn't change.
3440 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3441
3442 if (HasDwarfEHHandlers && !isDestroy &&
3444 BuildCFI(MBB, InsertPos, DL,
3445 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3446
3447 if (Amount == 0)
3448 return I;
3449
3450 // Factor out the amount that gets handled inside the sequence
3451 // (Pushes of argument for frame setup, callee pops for frame destroy)
3452 Amount -= InternalAmt;
3453
3454 // TODO: This is needed only if we require precise CFA.
3455 // If this is a callee-pop calling convention, emit a CFA adjust for
3456 // the amount the callee popped.
3457 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3458 BuildCFI(MBB, InsertPos, DL,
3459 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3460
3461 // Add Amount to SP to destroy a frame, or subtract to setup.
3462 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3463
3464 if (StackAdjustment) {
3465 // Merge with any previous or following adjustment instruction. Note: the
3466 // instructions merged with here do not have CFI, so their stack
3467 // adjustments do not feed into CfaAdjustment.
3468 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3469 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3470
3471 if (StackAdjustment) {
3472 if (!(F.hasMinSize() &&
3473 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3474 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3475 /*InEpilogue=*/false);
3476 }
3477 }
3478
3479 if (DwarfCFI && !hasFP(MF)) {
3480 // If we don't have FP, but need to generate unwind information,
3481 // we need to set the correct CFA offset after the stack adjustment.
3482 // How much we adjust the CFA offset depends on whether we're emitting
3483 // CFI only for EH purposes or for debugging. EH only requires the CFA
3484 // offset to be correct at each call site, while for debugging we want
3485 // it to be more precise.
3486
3487 int64_t CfaAdjustment = -StackAdjustment;
3488 // TODO: When not using precise CFA, we also need to adjust for the
3489 // InternalAmt here.
3490 if (CfaAdjustment) {
3491 BuildCFI(MBB, InsertPos, DL,
3493 CfaAdjustment));
3494 }
3495 }
3496
3497 return I;
3498 }
3499
3500 if (InternalAmt) {
3503 while (CI != B && !std::prev(CI)->isCall())
3504 --CI;
3505 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3506 }
3507
3508 return I;
3509}
3510
3512 assert(MBB.getParent() && "Block is not attached to a function!");
3513 const MachineFunction &MF = *MBB.getParent();
3514 if (!MBB.isLiveIn(X86::EFLAGS))
3515 return true;
3516
3517 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3518 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3520 const X86TargetLowering &TLI = *STI.getTargetLowering();
3521 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3522 return false;
3523
3525 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3526}
3527
3529 assert(MBB.getParent() && "Block is not attached to a function!");
3530
3531 // Win64 has strict requirements in terms of epilogue and we are
3532 // not taking a chance at messing with them.
3533 // I.e., unless this block is already an exit block, we can't use
3534 // it as an epilogue.
3535 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3536 return false;
3537
3538 // Swift async context epilogue has a BTR instruction that clobbers parts of
3539 // EFLAGS.
3540 const MachineFunction &MF = *MBB.getParent();
3543
3545 return true;
3546
3547 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3548 // clobbers the EFLAGS. Check that we do not need to preserve it,
3549 // otherwise, conservatively assume this is not
3550 // safe to insert the epilogue here.
3552}
3553
3555 // If we may need to emit frameless compact unwind information, give
3556 // up as this is currently broken: PR25614.
3557 bool CompactUnwind =
3559 nullptr;
3560 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3561 !CompactUnwind) &&
3562 // The lowering of segmented stack and HiPE only support entry
3563 // blocks as prologue blocks: PR26107. This limitation may be
3564 // lifted if we fix:
3565 // - adjustForSegmentedStacks
3566 // - adjustForHiPEPrologue
3568 !MF.shouldSplitStack();
3569}
3570
3573 const DebugLoc &DL, bool RestoreSP) const {
3574 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3575 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3576 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3577 "restoring EBP/ESI on non-32-bit target");
3578
3579 MachineFunction &MF = *MBB.getParent();
3581 Register BasePtr = TRI->getBaseRegister();
3582 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3584 MachineFrameInfo &MFI = MF.getFrameInfo();
3585
3586 // FIXME: Don't set FrameSetup flag in catchret case.
3587
3588 int FI = FuncInfo.EHRegNodeFrameIndex;
3589 int EHRegSize = MFI.getObjectSize(FI);
3590
3591 if (RestoreSP) {
3592 // MOV32rm -EHRegSize(%ebp), %esp
3593 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3594 X86::EBP, true, -EHRegSize)
3596 }
3597
3598 Register UsedReg;
3599 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3600 int EndOffset = -EHRegOffset - EHRegSize;
3601 FuncInfo.EHRegNodeEndOffset = EndOffset;
3602
3603 if (UsedReg == FramePtr) {
3604 // ADD $offset, %ebp
3605 unsigned ADDri = getADDriOpcode(false, EndOffset);
3606 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3608 .addImm(EndOffset)
3610 ->getOperand(3)
3611 .setIsDead();
3612 assert(EndOffset >= 0 &&
3613 "end of registration object above normal EBP position!");
3614 } else if (UsedReg == BasePtr) {
3615 // LEA offset(%ebp), %esi
3616 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3617 FramePtr, false, EndOffset)
3619 // MOV32rm SavedEBPOffset(%esi), %ebp
3620 assert(X86FI->getHasSEHFramePtrSave());
3621 int Offset =
3622 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3623 .getFixed();
3624 assert(UsedReg == BasePtr);
3625 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3626 UsedReg, true, Offset)
3628 } else {
3629 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3630 }
3631 return MBBI;
3632}
3633
3635 return TRI->getSlotSize();
3636}
3637
3640 return TRI->getDwarfRegNum(StackPtr, true);
3641}
3642
3643namespace {
3644// Struct used by orderFrameObjects to help sort the stack objects.
3645struct X86FrameSortingObject {
3646 bool IsValid = false; // true if we care about this Object.
3647 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3648 unsigned ObjectSize = 0; // Size of Object in bytes.
3649 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3650 unsigned ObjectNumUses = 0; // Object static number of uses.
3651};
3652
3653// The comparison function we use for std::sort to order our local
3654// stack symbols. The current algorithm is to use an estimated
3655// "density". This takes into consideration the size and number of
3656// uses each object has in order to roughly minimize code size.
3657// So, for example, an object of size 16B that is referenced 5 times
3658// will get higher priority than 4 4B objects referenced 1 time each.
3659// It's not perfect and we may be able to squeeze a few more bytes out of
3660// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
3661// fringe end can have special consideration, given their size is less
3662// important, etc.), but the algorithmic complexity grows too much to be
3663// worth the extra gains we get. This gets us pretty close.
3664// The final order leaves us with objects with highest priority going
3665// at the end of our list.
3666struct X86FrameSortingComparator {
3667 inline bool operator()(const X86FrameSortingObject &A,
3668 const X86FrameSortingObject &B) const {
3669 uint64_t DensityAScaled, DensityBScaled;
3670
3671 // For consistency in our comparison, all invalid objects are placed
3672 // at the end. This also allows us to stop walking when we hit the
3673 // first invalid item after it's all sorted.
3674 if (!A.IsValid)
3675 return false;
3676 if (!B.IsValid)
3677 return true;
3678
3679 // The density is calculated by doing :
3680 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
3681 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
3682 // Since this approach may cause inconsistencies in
3683 // the floating point <, >, == comparisons, depending on the floating
3684 // point model with which the compiler was built, we're going
3685 // to scale both sides by multiplying with
3686 // A.ObjectSize * B.ObjectSize. This ends up factoring away
3687 // the division and, with it, the need for any floating point
3688 // arithmetic.
3689 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
3690 static_cast<uint64_t>(B.ObjectSize);
3691 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
3692 static_cast<uint64_t>(A.ObjectSize);
3693
3694 // If the two densities are equal, prioritize highest alignment
3695 // objects. This allows for similar alignment objects
3696 // to be packed together (given the same density).
3697 // There's room for improvement here, also, since we can pack
3698 // similar alignment (different density) objects next to each
3699 // other to save padding. This will also require further
3700 // complexity/iterations, and the overall gain isn't worth it,
3701 // in general. Something to keep in mind, though.
3702 if (DensityAScaled == DensityBScaled)
3703 return A.ObjectAlignment < B.ObjectAlignment;
3704
3705 return DensityAScaled < DensityBScaled;
3706 }
3707};
3708} // namespace
3709
3710// Order the symbols in the local stack.
3711// We want to place the local stack objects in some sort of sensible order.
3712// The heuristic we use is to try and pack them according to static number
3713// of uses and size of object in order to minimize code size.
3715 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
3716 const MachineFrameInfo &MFI = MF.getFrameInfo();
3717
3718 // Don't waste time if there's nothing to do.
3719 if (ObjectsToAllocate.empty())
3720 return;
3721
3722 // Create an array of all MFI objects. We won't need all of these
3723 // objects, but we're going to create a full array of them to make
3724 // it easier to index into when we're counting "uses" down below.
3725 // We want to be able to easily/cheaply access an object by simply
3726 // indexing into it, instead of having to search for it every time.
3727 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
3728
3729 // Walk the objects we care about and mark them as such in our working
3730 // struct.
3731 for (auto &Obj : ObjectsToAllocate) {
3732 SortingObjects[Obj].IsValid = true;
3733 SortingObjects[Obj].ObjectIndex = Obj;
3734 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
3735 // Set the size.
3736 int ObjectSize = MFI.getObjectSize(Obj);
3737 if (ObjectSize == 0)
3738 // Variable size. Just use 4.
3739 SortingObjects[Obj].ObjectSize = 4;
3740 else
3741 SortingObjects[Obj].ObjectSize = ObjectSize;
3742 }
3743
3744 // Count the number of uses for each object.
3745 for (auto &MBB : MF) {
3746 for (auto &MI : MBB) {
3747 if (MI.isDebugInstr())
3748 continue;
3749 for (const MachineOperand &MO : MI.operands()) {
3750 // Check to see if it's a local stack symbol.
3751 if (!MO.isFI())
3752 continue;
3753 int Index = MO.getIndex();
3754 // Check to see if it falls within our range, and is tagged
3755 // to require ordering.
3756 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
3757 SortingObjects[Index].IsValid)
3758 SortingObjects[Index].ObjectNumUses++;
3759 }
3760 }
3761 }
3762
3763 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
3764 // info).
3765 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
3766
3767 // Now modify the original list to represent the final order that
3768 // we want. The order will depend on whether we're going to access them
3769 // from the stack pointer or the frame pointer. For SP, the list should
3770 // end up with the END containing objects that we want with smaller offsets.
3771 // For FP, it should be flipped.
3772 int i = 0;
3773 for (auto &Obj : SortingObjects) {
3774 // All invalid items are sorted at the end, so it's safe to stop.
3775 if (!Obj.IsValid)
3776 break;
3777 ObjectsToAllocate[i++] = Obj.ObjectIndex;
3778 }
3779
3780 // Flip it if we're accessing off of the FP.
3781 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
3782 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
3783}
3784
3785
3787 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
3788 unsigned Offset = 16;
3789 // RBP is immediately pushed.
3790 Offset += SlotSize;
3791 // All callee-saved registers are then pushed.
3792 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
3793 // Every funclet allocates enough stack space for the largest outgoing call.
3794 Offset += getWinEHFuncletFrameSize(MF);
3795 return Offset;
3796}
3797
3799 MachineFunction &MF, RegScavenger *RS) const {
3800 // Mark the function as not having WinCFI. We will set it back to true in
3801 // emitPrologue if it gets called and emits CFI.
3802 MF.setHasWinCFI(false);
3803
3804 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
3805 // aligned. The format doesn't support misaligned stack adjustments.
3808
3809 // If this function isn't doing Win64-style C++ EH, we don't need to do
3810 // anything.
3811 if (STI.is64Bit() && MF.hasEHFunclets() &&
3814 adjustFrameForMsvcCxxEh(MF);
3815 }
3816}
3817
3818void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
3819 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
3820 // relative to RSP after the prologue. Find the offset of the last fixed
3821 // object, so that we can allocate a slot immediately following it. If there
3822 // were no fixed objects, use offset -SlotSize, which is immediately after the
3823 // return address. Fixed objects have negative frame indices.
3824 MachineFrameInfo &MFI = MF.getFrameInfo();
3825 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
3826 int64_t MinFixedObjOffset = -SlotSize;
3827 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
3828 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
3829
3830 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
3831 for (WinEHHandlerType &H : TBME.HandlerArray) {
3832 int FrameIndex = H.CatchObj.FrameIndex;
3833 if (FrameIndex != INT_MAX) {
3834 // Ensure alignment.
3835 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
3836 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
3837 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
3838 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
3839 }
3840 }
3841 }
3842
3843 // Ensure alignment.
3844 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
3845 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
3846 int UnwindHelpFI =
3847 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
3848 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
3849
3850 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
3851 // other frame setup instructions.
3852 MachineBasicBlock &MBB = MF.front();
3853 auto MBBI = MBB.begin();
3854 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
3855 ++MBBI;
3856
3858 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
3859 UnwindHelpFI)
3860 .addImm(-2);
3861}
3862
3864 MachineFunction &MF, RegScavenger *RS) const {
3865 if (STI.is32Bit() && MF.hasEHFunclets())
3867}
3868
3870 MachineFunction &MF) const {
3871 // 32-bit functions have to restore stack pointers when control is transferred
3872 // back to the parent function. These blocks are identified as eh pads that
3873 // are not funclet entries.
3874 bool IsSEH = isAsynchronousEHPersonality(
3876 for (MachineBasicBlock &MBB : MF) {
3877 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
3878 if (NeedsRestore)
3880 /*RestoreSP=*/IsSEH);
3881 }
3882}
unsigned const MachineRegisterInfo * MRI
static bool isFuncletReturnInstr(const MachineInstr &MI)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const uint64_t kSplitStackAvailable
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:467
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getADDriOpcode(bool IsLP64, int64_t Imm)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm)
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
BitVector & reset()
Definition: BitVector.h:385
BitVector & set()
Definition: BitVector.h:344
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:133
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:141
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:803
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1995
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:313
size_t arg_size() const
Definition: Function.h:799
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:619
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:187
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:640
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:797
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:547
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register)
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:604
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size)
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:638
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:540
OpType getOperation() const
Definition: MCDwarf.h:642
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:554
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:533
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:571
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:450
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
Metadata node.
Definition: Metadata.h:943
A single uniqued string.
Definition: Metadata.h:611
StringRef getString() const
Definition: Metadata.cpp:507
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
MachineModuleInfo & getMMI() const
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
const MCContext & getContext() const
const Module * getModule() const
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:251
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:568
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
A tuple of MDNodes.
Definition: Metadata.h:1587
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1215
unsigned getNumOperands() const
Definition: Metadata.cpp:1211
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:36