LLVM 20.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/LEB128.h"
37#include <cstdlib>
38
39#define DEBUG_TYPE "x86-fl"
40
41STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
42STATISTIC(NumFrameExtraProbe,
43 "Number of extra stack probes generated in prologue");
44STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
45
46using namespace llvm;
47
49 MaybeAlign StackAlignOverride)
50 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
51 STI.is64Bit() ? -8 : -4),
52 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
55 Is64Bit = STI.is64Bit();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
60}
61
63 return !MF.getFrameInfo().hasVarSizedObjects() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
66}
67
68/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69/// call frame pseudos can be simplified. Having a FP, as in the default
70/// implementation, is not sufficient here since we can't always use it.
71/// Use a more nuanced condition.
73 const MachineFunction &MF) const {
74 return hasReservedCallFrame(MF) ||
75 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
76 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78}
79
80// needsFrameIndexResolution - Do we need to perform FI resolution for
81// this function. Normally, this is required only when the function
82// has any stack objects. However, FI resolution actually has another job,
83// not apparent from the title - it resolves callframesetup/destroy
84// that were not simplified earlier.
85// So, this is required for x86 functions that have push sequences even
86// when there are no stack objects.
88 const MachineFunction &MF) const {
89 return MF.getFrameInfo().hasStackObjects() ||
90 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
91}
92
93/// hasFPImpl - Return true if the specified function should have a dedicated
94/// frame pointer register. This is true if the function has variable sized
95/// allocas or if frame pointer elimination is disabled.
97 const MachineFrameInfo &MFI = MF.getFrameInfo();
98 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
99 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
103 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
104 MFI.hasStackMap() || MFI.hasPatchPoint() ||
105 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
106}
107
108static unsigned getSUBriOpcode(bool IsLP64) {
109 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
110}
111
112static unsigned getADDriOpcode(bool IsLP64) {
113 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
114}
115
116static unsigned getSUBrrOpcode(bool IsLP64) {
117 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
118}
119
120static unsigned getADDrrOpcode(bool IsLP64) {
121 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
122}
123
124static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
125 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
126}
127
128static unsigned getLEArOpcode(bool IsLP64) {
129 return IsLP64 ? X86::LEA64r : X86::LEA32r;
130}
131
132static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
133 if (Use64BitReg) {
134 if (isUInt<32>(Imm))
135 return X86::MOV32ri64;
136 if (isInt<32>(Imm))
137 return X86::MOV64ri32;
138 return X86::MOV64ri;
139 }
140 return X86::MOV32ri;
141}
142
143// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
144// value written by the PUSH from the stack. The processor tracks these marked
145// instructions internally and fast-forwards register data between matching PUSH
146// and POP instructions, without going through memory or through the training
147// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
148// memory-renaming optimization can be used.
149//
150// The PPX hint is purely a performance hint. Instructions with this hint have
151// the same functional semantics as those without. PPX hints set by the
152// compiler that violate the balancing rule may turn off the PPX optimization,
153// but they will not affect program semantics.
154//
155// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
156// are not considered).
157//
158// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
159// GPRs at a time to/from the stack.
160static unsigned getPUSHOpcode(const X86Subtarget &ST) {
161 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
162 : X86::PUSH32r;
163}
164static unsigned getPOPOpcode(const X86Subtarget &ST) {
165 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
166 : X86::POP32r;
167}
168static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
169 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
170}
171static unsigned getPOP2Opcode(const X86Subtarget &ST) {
172 return ST.hasPPX() ? X86::POP2P : X86::POP2;
173}
174
177 MCRegister Reg = RegMask.PhysReg;
178
179 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
180 Reg == X86::AH || Reg == X86::AL)
181 return true;
182 }
183
184 return false;
185}
186
187/// Check if the flags need to be preserved before the terminators.
188/// This would be the case, if the eflags is live-in of the region
189/// composed by the terminators or live-out of that region, without
190/// being defined by a terminator.
191static bool
193 for (const MachineInstr &MI : MBB.terminators()) {
194 bool BreakNext = false;
195 for (const MachineOperand &MO : MI.operands()) {
196 if (!MO.isReg())
197 continue;
198 Register Reg = MO.getReg();
199 if (Reg != X86::EFLAGS)
200 continue;
201
202 // This terminator needs an eflags that is not defined
203 // by a previous another terminator:
204 // EFLAGS is live-in of the region composed by the terminators.
205 if (!MO.isDef())
206 return true;
207 // This terminator defines the eflags, i.e., we don't need to preserve it.
208 // However, we still need to check this specific terminator does not
209 // read a live-in value.
210 BreakNext = true;
211 }
212 // We found a definition of the eflags, no need to preserve them.
213 if (BreakNext)
214 return false;
215 }
216
217 // None of the terminators use or define the eflags.
218 // Check if they are live-out, that would imply we need to preserve them.
219 for (const MachineBasicBlock *Succ : MBB.successors())
220 if (Succ->isLiveIn(X86::EFLAGS))
221 return true;
222
223 return false;
224}
225
226/// emitSPUpdate - Emit a series of instructions to increment / decrement the
227/// stack pointer by a constant value.
230 const DebugLoc &DL, int64_t NumBytes,
231 bool InEpilogue) const {
232 bool isSub = NumBytes < 0;
233 uint64_t Offset = isSub ? -NumBytes : NumBytes;
236
237 uint64_t Chunk = (1LL << 31) - 1;
238
242 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
243
244 // It's ok to not take into account large chunks when probing, as the
245 // allocation is split in smaller chunks anyway.
246 if (EmitInlineStackProbe && !InEpilogue) {
247
248 // This pseudo-instruction is going to be expanded, potentially using a
249 // loop, by inlineStackProbe().
250 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
251 return;
252 } else if (Offset > Chunk) {
253 // Rather than emit a long series of instructions for large offsets,
254 // load the offset into a register and do one sub/add
255 unsigned Reg = 0;
256 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
257
258 if (isSub && !isEAXLiveIn(MBB))
259 Reg = Rax;
260 else
262
263 unsigned AddSubRROpc =
265 if (Reg) {
267 .addImm(Offset)
268 .setMIFlag(Flag);
269 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
271 .addReg(Reg);
272 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
273 return;
274 } else if (Offset > 8 * Chunk) {
275 // If we would need more than 8 add or sub instructions (a >16GB stack
276 // frame), it's worth spilling RAX to materialize this immediate.
277 // pushq %rax
278 // movabsq +-$Offset+-SlotSize, %rax
279 // addq %rsp, %rax
280 // xchg %rax, (%rsp)
281 // movq (%rsp), %rsp
282 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
283 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
285 .setMIFlag(Flag);
286 // Subtract is not commutative, so negate the offset and always use add.
287 // Subtract 8 less and add 8 more to account for the PUSH we just did.
288 if (isSub)
289 Offset = -(Offset - SlotSize);
290 else
293 .addImm(Offset)
294 .setMIFlag(Flag);
295 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
296 .addReg(Rax)
298 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
299 // Exchange the new SP in RAX with the top of the stack.
301 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
302 StackPtr, false, 0);
303 // Load new SP from the top of the stack into RSP.
304 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
305 StackPtr, false, 0);
306 return;
307 }
308 }
309
310 while (Offset) {
311 uint64_t ThisVal = std::min(Offset, Chunk);
312 if (ThisVal == SlotSize) {
313 // Use push / pop for slot sized adjustments as a size optimization. We
314 // need to find a dead register when using pop.
315 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
317 if (Reg) {
318 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
319 : (Is64Bit ? X86::POP64r : X86::POP32r);
320 BuildMI(MBB, MBBI, DL, TII.get(Opc))
321 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
322 .setMIFlag(Flag);
323 Offset -= ThisVal;
324 continue;
325 }
326 }
327
328 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
329 .setMIFlag(Flag);
330
331 Offset -= ThisVal;
332 }
333}
334
335MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
337 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
338 assert(Offset != 0 && "zero offset stack adjustment requested");
339
340 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
341 // is tricky.
342 bool UseLEA;
343 if (!InEpilogue) {
344 // Check if inserting the prologue at the beginning
345 // of MBB would require to use LEA operations.
346 // We need to use LEA operations if EFLAGS is live in, because
347 // it means an instruction will read it before it gets defined.
348 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
349 } else {
350 // If we can use LEA for SP but we shouldn't, check that none
351 // of the terminators uses the eflags. Otherwise we will insert
352 // a ADD that will redefine the eflags and break the condition.
353 // Alternatively, we could move the ADD, but this may not be possible
354 // and is an optimization anyway.
356 if (UseLEA && !STI.useLeaForSP())
358 // If that assert breaks, that means we do not do the right thing
359 // in canUseAsEpilogue.
361 "We shouldn't have allowed this insertion point");
362 }
363
365 if (UseLEA) {
368 StackPtr),
369 StackPtr, false, Offset);
370 } else {
371 bool IsSub = Offset < 0;
372 uint64_t AbsOffset = IsSub ? -Offset : Offset;
373 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
375 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
377 .addImm(AbsOffset);
378 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
379 }
380 return MI;
381}
382
385 bool doMergeWithPrevious) const {
386 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
387 (!doMergeWithPrevious && MBBI == MBB.end()))
388 return 0;
389
390 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
391
393 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
394 // instruction, and that there are no DBG_VALUE or other instructions between
395 // ADD/SUB/LEA and its corresponding CFI instruction.
396 /* TODO: Add support for the case where there are multiple CFI instructions
397 below the ADD/SUB/LEA, e.g.:
398 ...
399 add
400 cfi_def_cfa_offset
401 cfi_offset
402 ...
403 */
404 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
405 PI = std::prev(PI);
406
407 unsigned Opc = PI->getOpcode();
408 int Offset = 0;
409
410 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
411 PI->getOperand(0).getReg() == StackPtr) {
412 assert(PI->getOperand(1).getReg() == StackPtr);
413 Offset = PI->getOperand(2).getImm();
414 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
415 PI->getOperand(0).getReg() == StackPtr &&
416 PI->getOperand(1).getReg() == StackPtr &&
417 PI->getOperand(2).getImm() == 1 &&
418 PI->getOperand(3).getReg() == X86::NoRegister &&
419 PI->getOperand(5).getReg() == X86::NoRegister) {
420 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
421 Offset = PI->getOperand(4).getImm();
422 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
423 PI->getOperand(0).getReg() == StackPtr) {
424 assert(PI->getOperand(1).getReg() == StackPtr);
425 Offset = -PI->getOperand(2).getImm();
426 } else
427 return 0;
428
429 PI = MBB.erase(PI);
430 if (PI != MBB.end() && PI->isCFIInstruction()) {
431 auto CIs = MBB.getParent()->getFrameInstructions();
432 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
435 PI = MBB.erase(PI);
436 }
437 if (!doMergeWithPrevious)
439
440 return Offset;
441}
442
445 const DebugLoc &DL,
446 const MCCFIInstruction &CFIInst,
447 MachineInstr::MIFlag Flag) const {
449 unsigned CFIIndex = MF.addFrameInst(CFIInst);
450
452 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
453
454 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
455 .addCFIIndex(CFIIndex)
456 .setMIFlag(Flag);
457}
458
459/// Emits Dwarf Info specifying offsets of callee saved registers and
460/// frame pointer. This is called only when basic block sections are enabled.
464 if (!hasFP(MF)) {
466 return;
467 }
470 const Register MachineFramePtr =
472 : FramePtr;
473 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
474 // Offset = space for return address + size of the frame pointer itself.
475 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
477 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
479}
480
483 const DebugLoc &DL, bool IsPrologue) const {
485 MachineFrameInfo &MFI = MF.getFrameInfo();
488
489 // Add callee saved registers to move list.
490 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
491
492 // Calculate offsets.
493 for (const CalleeSavedInfo &I : CSI) {
494 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
495 Register Reg = I.getReg();
496 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
497
498 if (IsPrologue) {
499 if (X86FI->getStackPtrSaveMI()) {
500 // +2*SlotSize because there is return address and ebp at the bottom
501 // of the stack.
502 // | retaddr |
503 // | ebp |
504 // | |<--ebp
505 Offset += 2 * SlotSize;
506 SmallString<64> CfaExpr;
507 CfaExpr.push_back(dwarf::DW_CFA_expression);
508 uint8_t buffer[16];
509 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
510 CfaExpr.push_back(2);
512 const Register MachineFramePtr =
515 : FramePtr;
516 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
517 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
518 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
520 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
522 } else {
524 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
525 }
526 } else {
528 MCCFIInstruction::createRestore(nullptr, DwarfReg));
529 }
530 }
531 if (auto *MI = X86FI->getStackPtrSaveMI()) {
532 int FI = MI->getOperand(1).getIndex();
533 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
534 SmallString<64> CfaExpr;
536 const Register MachineFramePtr =
539 : FramePtr;
540 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
541 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
542 uint8_t buffer[16];
543 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
544 CfaExpr.push_back(dwarf::DW_OP_deref);
545
546 SmallString<64> DefCfaExpr;
547 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
548 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
549 DefCfaExpr.append(CfaExpr.str());
550 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
552 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
554 }
555}
556
557void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
558 MachineBasicBlock &MBB) const {
559 const MachineFunction &MF = *MBB.getParent();
560
561 // Insertion point.
563
564 // Fake a debug loc.
565 DebugLoc DL;
566 if (MBBI != MBB.end())
567 DL = MBBI->getDebugLoc();
568
569 // Zero out FP stack if referenced. Do this outside of the loop below so that
570 // it's done only once.
571 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
572 for (MCRegister Reg : RegsToZero.set_bits()) {
573 if (!X86::RFP80RegClass.contains(Reg))
574 continue;
575
576 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
577 for (unsigned i = 0; i != NumFPRegs; ++i)
578 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
579
580 for (unsigned i = 0; i != NumFPRegs; ++i)
581 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
582 break;
583 }
584
585 // For GPRs, we only care to clear out the 32-bit register.
586 BitVector GPRsToZero(TRI->getNumRegs());
587 for (MCRegister Reg : RegsToZero.set_bits())
588 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
589 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
590 RegsToZero.reset(Reg);
591 }
592
593 // Zero out the GPRs first.
594 for (MCRegister Reg : GPRsToZero.set_bits())
596
597 // Zero out the remaining registers.
598 for (MCRegister Reg : RegsToZero.set_bits())
600}
601
604 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
605 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
608 if (InProlog) {
609 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
610 .addImm(0 /* no explicit stack size */);
611 } else {
612 emitStackProbeInline(MF, MBB, MBBI, DL, false);
613 }
614 } else {
615 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
616 }
617}
618
620 return STI.isOSWindows() && !STI.isTargetWin64();
621}
622
624 MachineBasicBlock &PrologMBB) const {
625 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
626 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
627 });
628 if (Where != PrologMBB.end()) {
629 DebugLoc DL = PrologMBB.findDebugLoc(Where);
630 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
631 Where->eraseFromParent();
632 }
633}
634
635void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
638 const DebugLoc &DL,
639 bool InProlog) const {
641 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
642 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
643 else
644 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
645}
646
647void X86FrameLowering::emitStackProbeInlineGeneric(
649 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
650 MachineInstr &AllocWithProbe = *MBBI;
651 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
652
655 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
656 "different expansion expected for CoreCLR 64 bit");
657
658 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
659 uint64_t ProbeChunk = StackProbeSize * 8;
660
661 uint64_t MaxAlign =
662 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
663
664 // Synthesize a loop or unroll it, depending on the number of iterations.
665 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
666 // between the unaligned rsp and current rsp.
667 if (Offset > ProbeChunk) {
668 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
669 MaxAlign % StackProbeSize);
670 } else {
671 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
672 MaxAlign % StackProbeSize);
673 }
674}
675
676void X86FrameLowering::emitStackProbeInlineGenericBlock(
679 uint64_t AlignOffset) const {
680
681 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
682 const bool HasFP = hasFP(MF);
685 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
686 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
687
688 uint64_t CurrentOffset = 0;
689
690 assert(AlignOffset < StackProbeSize);
691
692 // If the offset is so small it fits within a page, there's nothing to do.
693 if (StackProbeSize < Offset + AlignOffset) {
694
695 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
696 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
698 if (!HasFP && NeedsDwarfCFI) {
699 BuildCFI(
700 MBB, MBBI, DL,
701 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
702 }
703
704 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
706 StackPtr, false, 0)
707 .addImm(0)
709 NumFrameExtraProbe++;
710 CurrentOffset = StackProbeSize - AlignOffset;
711 }
712
713 // For the next N - 1 pages, just probe. I tried to take advantage of
714 // natural probes but it implies much more logic and there was very few
715 // interesting natural probes to interleave.
716 while (CurrentOffset + StackProbeSize < Offset) {
717 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
719
720 if (!HasFP && NeedsDwarfCFI) {
721 BuildCFI(
722 MBB, MBBI, DL,
723 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
724 }
725 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
727 StackPtr, false, 0)
728 .addImm(0)
730 NumFrameExtraProbe++;
731 CurrentOffset += StackProbeSize;
732 }
733
734 // No need to probe the tail, it is smaller than a Page.
735 uint64_t ChunkSize = Offset - CurrentOffset;
736 if (ChunkSize == SlotSize) {
737 // Use push for slot sized adjustments as a size optimization,
738 // like emitSPUpdate does when not probing.
739 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
740 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
741 BuildMI(MBB, MBBI, DL, TII.get(Opc))
744 } else {
745 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
747 }
748 // No need to adjust Dwarf CFA offset here, the last position of the stack has
749 // been defined
750}
751
752void X86FrameLowering::emitStackProbeInlineGenericLoop(
755 uint64_t AlignOffset) const {
756 assert(Offset && "null offset");
757
758 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
760 "Inline stack probe loop will clobber live EFLAGS.");
761
762 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
763 const bool HasFP = hasFP(MF);
766 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
767 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
768
769 if (AlignOffset) {
770 if (AlignOffset < StackProbeSize) {
771 // Perform a first smaller allocation followed by a probe.
772 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
774
775 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
777 StackPtr, false, 0)
778 .addImm(0)
780 NumFrameExtraProbe++;
781 Offset -= AlignOffset;
782 }
783 }
784
785 // Synthesize a loop
786 NumFrameLoopProbe++;
787 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
788
789 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
790 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
791
793 MF.insert(MBBIter, testMBB);
794 MF.insert(MBBIter, tailMBB);
795
796 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
797 : Is64Bit ? X86::R11D
798 : X86::EAX;
799
800 // save loop bound
801 {
802 const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
803
804 // Can we calculate the loop bound using SUB with a 32-bit immediate?
805 // Note that the immediate gets sign-extended when used with a 64-bit
806 // register, so in that case we only have 31 bits to work with.
807 bool canUseSub =
808 Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
809
810 if (canUseSub) {
811 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
812
813 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
816 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
817 .addReg(FinalStackProbed)
818 .addImm(BoundOffset)
820 } else if (Uses64BitFramePtr) {
821 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
822 .addImm(-BoundOffset)
824 BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
825 .addReg(FinalStackProbed)
828 } else {
829 // We're being asked to probe a stack frame that's 4 GiB or larger,
830 // but our stack pointer is only 32 bits. This might be unreachable
831 // code, so don't complain now; just trap if it's reached at runtime.
832 BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
833 }
834
835 // while in the loop, use loop-invariant reg for CFI,
836 // instead of the stack pointer, which changes during the loop
837 if (!HasFP && NeedsDwarfCFI) {
838 // x32 uses the same DWARF register numbers as x86-64,
839 // so there isn't a register number for r11d, we must use r11 instead
840 const Register DwarfFinalStackProbed =
842 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
843 : FinalStackProbed;
844
847 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
849 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
850 }
851 }
852
853 // allocate a page
854 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
855 /*InEpilogue=*/false)
857
858 // touch the page
859 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
861 StackPtr, false, 0)
862 .addImm(0)
864
865 // cmp with stack pointer bound
866 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
868 .addReg(FinalStackProbed)
870
871 // jump
872 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
873 .addMBB(testMBB)
876 testMBB->addSuccessor(testMBB);
877 testMBB->addSuccessor(tailMBB);
878
879 // BB management
880 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
882 MBB.addSuccessor(testMBB);
883
884 // handle tail
885 const uint64_t TailOffset = Offset % StackProbeSize;
886 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
887 if (TailOffset) {
888 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
889 /*InEpilogue=*/false)
891 }
892
893 // after the loop, switch back to stack pointer for CFI
894 if (!HasFP && NeedsDwarfCFI) {
895 // x32 uses the same DWARF register numbers as x86-64,
896 // so there isn't a register number for esp, we must use rsp instead
897 const Register DwarfStackPtr =
901
902 BuildCFI(*tailMBB, TailMBBIter, DL,
904 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
905 }
906
907 // Update Live In information
908 fullyRecomputeLiveIns({tailMBB, testMBB});
909}
910
911void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
913 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
915 assert(STI.is64Bit() && "different expansion needed for 32 bit");
916 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
918 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
919
920 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
922 "Inline stack probe loop will clobber live EFLAGS.");
923
924 // RAX contains the number of bytes of desired stack adjustment.
925 // The handling here assumes this value has already been updated so as to
926 // maintain stack alignment.
927 //
928 // We need to exit with RSP modified by this amount and execute suitable
929 // page touches to notify the OS that we're growing the stack responsibly.
930 // All stack probing must be done without modifying RSP.
931 //
932 // MBB:
933 // SizeReg = RAX;
934 // ZeroReg = 0
935 // CopyReg = RSP
936 // Flags, TestReg = CopyReg - SizeReg
937 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
938 // LimitReg = gs magic thread env access
939 // if FinalReg >= LimitReg goto ContinueMBB
940 // RoundBB:
941 // RoundReg = page address of FinalReg
942 // LoopMBB:
943 // LoopReg = PHI(LimitReg,ProbeReg)
944 // ProbeReg = LoopReg - PageSize
945 // [ProbeReg] = 0
946 // if (ProbeReg > RoundReg) goto LoopMBB
947 // ContinueMBB:
948 // RSP = RSP - RAX
949 // [rest of original MBB]
950
951 // Set up the new basic blocks
952 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
953 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
954 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
955
956 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
957 MF.insert(MBBIter, RoundMBB);
958 MF.insert(MBBIter, LoopMBB);
959 MF.insert(MBBIter, ContinueMBB);
960
961 // Split MBB and move the tail portion down to ContinueMBB.
962 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
963 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
965
966 // Some useful constants
967 const int64_t ThreadEnvironmentStackLimit = 0x10;
968 const int64_t PageSize = 0x1000;
969 const int64_t PageMask = ~(PageSize - 1);
970
971 // Registers we need. For the normal case we use virtual
972 // registers. For the prolog expansion we use RAX, RCX and RDX.
974 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
975 const Register
976 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
977 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
978 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
979 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
980 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
981 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
982 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
983 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
984 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
985
986 // SP-relative offsets where we can save RCX and RDX.
987 int64_t RCXShadowSlot = 0;
988 int64_t RDXShadowSlot = 0;
989
990 // If inlining in the prolog, save RCX and RDX.
991 if (InProlog) {
992 // Compute the offsets. We need to account for things already
993 // pushed onto the stack at this point: return address, frame
994 // pointer (if used), and callee saves.
996 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
997 const bool HasFP = hasFP(MF);
998
999 // Check if we need to spill RCX and/or RDX.
1000 // Here we assume that no earlier prologue instruction changes RCX and/or
1001 // RDX, so checking the block live-ins is enough.
1002 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1003 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1004 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1005 // Assign the initial slot to both registers, then change RDX's slot if both
1006 // need to be spilled.
1007 if (IsRCXLiveIn)
1008 RCXShadowSlot = InitSlot;
1009 if (IsRDXLiveIn)
1010 RDXShadowSlot = InitSlot;
1011 if (IsRDXLiveIn && IsRCXLiveIn)
1012 RDXShadowSlot += 8;
1013 // Emit the saves if needed.
1014 if (IsRCXLiveIn)
1015 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1016 RCXShadowSlot)
1017 .addReg(X86::RCX);
1018 if (IsRDXLiveIn)
1019 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1020 RDXShadowSlot)
1021 .addReg(X86::RDX);
1022 } else {
1023 // Not in the prolog. Copy RAX to a virtual reg.
1024 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1025 }
1026
1027 // Add code to MBB to check for overflow and set the new target stack pointer
1028 // to zero if so.
1029 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1030 .addReg(ZeroReg, RegState::Undef)
1031 .addReg(ZeroReg, RegState::Undef);
1032 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1033 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1034 .addReg(CopyReg)
1035 .addReg(SizeReg);
1036 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1037 .addReg(TestReg)
1038 .addReg(ZeroReg)
1040
1041 // FinalReg now holds final stack pointer value, or zero if
1042 // allocation would overflow. Compare against the current stack
1043 // limit from the thread environment block. Note this limit is the
1044 // lowest touched page on the stack, not the point at which the OS
1045 // will cause an overflow exception, so this is just an optimization
1046 // to avoid unnecessarily touching pages that are below the current
1047 // SP but already committed to the stack by the OS.
1048 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1049 .addReg(0)
1050 .addImm(1)
1051 .addReg(0)
1052 .addImm(ThreadEnvironmentStackLimit)
1053 .addReg(X86::GS);
1054 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1055 // Jump if the desired stack pointer is at or above the stack limit.
1056 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1057 .addMBB(ContinueMBB)
1059
1060 // Add code to roundMBB to round the final stack pointer to a page boundary.
1061 if (InProlog)
1062 RoundMBB->addLiveIn(FinalReg);
1063 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1064 .addReg(FinalReg)
1065 .addImm(PageMask);
1066 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1067
1068 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1069 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1070 // and probe until we reach RoundedReg.
1071 if (!InProlog) {
1072 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1073 .addReg(LimitReg)
1074 .addMBB(RoundMBB)
1075 .addReg(ProbeReg)
1076 .addMBB(LoopMBB);
1077 }
1078
1079 if (InProlog)
1080 LoopMBB->addLiveIn(JoinReg);
1081 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1082 false, -PageSize);
1083
1084 // Probe by storing a byte onto the stack.
1085 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1086 .addReg(ProbeReg)
1087 .addImm(1)
1088 .addReg(0)
1089 .addImm(0)
1090 .addReg(0)
1091 .addImm(0);
1092
1093 if (InProlog)
1094 LoopMBB->addLiveIn(RoundedReg);
1095 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1096 .addReg(RoundedReg)
1097 .addReg(ProbeReg);
1098 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1099 .addMBB(LoopMBB)
1101
1102 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1103
1104 // If in prolog, restore RDX and RCX.
1105 if (InProlog) {
1106 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1107 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1108 TII.get(X86::MOV64rm), X86::RCX),
1109 X86::RSP, false, RCXShadowSlot);
1110 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1111 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1112 TII.get(X86::MOV64rm), X86::RDX),
1113 X86::RSP, false, RDXShadowSlot);
1114 }
1115
1116 // Now that the probing is done, add code to continueMBB to update
1117 // the stack pointer for real.
1118 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1119 .addReg(X86::RSP)
1120 .addReg(SizeReg);
1121
1122 // Add the control flow edges we need.
1123 MBB.addSuccessor(ContinueMBB);
1124 MBB.addSuccessor(RoundMBB);
1125 RoundMBB->addSuccessor(LoopMBB);
1126 LoopMBB->addSuccessor(ContinueMBB);
1127 LoopMBB->addSuccessor(LoopMBB);
1128
1129 if (InProlog) {
1130 LivePhysRegs LiveRegs;
1131 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1132 }
1133
1134 // Mark all the instructions added to the prolog as frame setup.
1135 if (InProlog) {
1136 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1137 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1138 }
1139 for (MachineInstr &MI : *RoundMBB) {
1141 }
1142 for (MachineInstr &MI : *LoopMBB) {
1144 }
1145 for (MachineInstr &MI :
1146 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1148 }
1149 }
1150}
1151
1152void X86FrameLowering::emitStackProbeCall(
1154 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1155 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1156 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1157
1158 // FIXME: Add indirect thunk support and remove this.
1159 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1160 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1161 "code model and indirect thunks not yet implemented.");
1162
1163 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1165 "Stack probe calls will clobber live EFLAGS.");
1166
1167 unsigned CallOp;
1168 if (Is64Bit)
1169 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1170 else
1171 CallOp = X86::CALLpcrel32;
1172
1174
1176 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1177
1178 // All current stack probes take AX and SP as input, clobber flags, and
1179 // preserve all registers. x86_64 probes leave RSP unmodified.
1181 // For the large code model, we have to call through a register. Use R11,
1182 // as it is scratch in all supported calling conventions.
1183 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1185 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1186 } else {
1187 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1189 }
1190
1191 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1192 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1198
1199 MachineInstr *ModInst = CI;
1200 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1201 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1202 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1203 // themselves. They also does not clobber %rax so we can reuse it when
1204 // adjusting %rsp.
1205 // All other platforms do not specify a particular ABI for the stack probe
1206 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1207 ModInst =
1209 .addReg(SP)
1210 .addReg(AX);
1211 }
1212
1213 // DebugInfo variable locations -- if there's an instruction number for the
1214 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1215 // modifies SP.
1216 if (InstrNum) {
1217 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1218 // Label destination operand of the subtract.
1219 MF.makeDebugValueSubstitution(*InstrNum,
1220 {ModInst->getDebugInstrNum(), 0});
1221 } else {
1222 // Label the call. The operand number is the penultimate operand, zero
1223 // based.
1224 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1226 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1227 }
1228 }
1229
1230 if (InProlog) {
1231 // Apply the frame setup flag to all inserted instrs.
1232 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1233 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1234 }
1235}
1236
1237static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1238 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1239 // and might require smaller successive adjustments.
1240 const uint64_t Win64MaxSEHOffset = 128;
1241 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1242 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1243 return SEHFrameOffset & -16;
1244}
1245
1246// If we're forcing a stack realignment we can't rely on just the frame
1247// info, we need to know the ABI stack alignment as well in case we
1248// have a call out. Otherwise just make sure we have some alignment - we'll
1249// go with the minimum SlotSize.
1251X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1252 const MachineFrameInfo &MFI = MF.getFrameInfo();
1253 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1254 Align StackAlign = getStackAlign();
1255 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1256 if (HasRealign) {
1257 if (MFI.hasCalls())
1258 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1259 else if (MaxAlign < SlotSize)
1260 MaxAlign = Align(SlotSize);
1261 }
1262
1264 if (HasRealign)
1265 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1266 else
1267 MaxAlign = Align(16);
1268 }
1269 return MaxAlign.value();
1270}
1271
1272void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1274 const DebugLoc &DL, unsigned Reg,
1275 uint64_t MaxAlign) const {
1276 uint64_t Val = -MaxAlign;
1277 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1278
1279 MachineFunction &MF = *MBB.getParent();
1281 const X86TargetLowering &TLI = *STI.getTargetLowering();
1282 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1283 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1284
1285 // We want to make sure that (in worst case) less than StackProbeSize bytes
1286 // are not probed after the AND. This assumption is used in
1287 // emitStackProbeInlineGeneric.
1288 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1289 {
1290 NumFrameLoopProbe++;
1291 MachineBasicBlock *entryMBB =
1293 MachineBasicBlock *headMBB =
1295 MachineBasicBlock *bodyMBB =
1297 MachineBasicBlock *footMBB =
1299
1301 MF.insert(MBBIter, entryMBB);
1302 MF.insert(MBBIter, headMBB);
1303 MF.insert(MBBIter, bodyMBB);
1304 MF.insert(MBBIter, footMBB);
1305 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1306 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1307 : Is64Bit ? X86::R11D
1308 : X86::EAX;
1309
1310 // Setup entry block
1311 {
1312
1313 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1314 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1317 MachineInstr *MI =
1318 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1319 .addReg(FinalStackProbed)
1320 .addImm(Val)
1322
1323 // The EFLAGS implicit def is dead.
1324 MI->getOperand(3).setIsDead();
1325
1326 BuildMI(entryMBB, DL,
1327 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1328 .addReg(FinalStackProbed)
1331 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1332 .addMBB(&MBB)
1335 entryMBB->addSuccessor(headMBB);
1336 entryMBB->addSuccessor(&MBB);
1337 }
1338
1339 // Loop entry block
1340
1341 {
1342 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1343 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1345 .addImm(StackProbeSize)
1347
1348 BuildMI(headMBB, DL,
1349 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1351 .addReg(FinalStackProbed)
1353
1354 // jump to the footer if StackPtr < FinalStackProbed
1355 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1356 .addMBB(footMBB)
1359
1360 headMBB->addSuccessor(bodyMBB);
1361 headMBB->addSuccessor(footMBB);
1362 }
1363
1364 // setup loop body
1365 {
1366 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1368 StackPtr, false, 0)
1369 .addImm(0)
1371
1372 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1373 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1375 .addImm(StackProbeSize)
1377
1378 // cmp with stack pointer bound
1379 BuildMI(bodyMBB, DL,
1380 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1381 .addReg(FinalStackProbed)
1384
1385 // jump back while FinalStackProbed < StackPtr
1386 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1387 .addMBB(bodyMBB)
1390 bodyMBB->addSuccessor(bodyMBB);
1391 bodyMBB->addSuccessor(footMBB);
1392 }
1393
1394 // setup loop footer
1395 {
1396 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1397 .addReg(FinalStackProbed)
1399 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1401 StackPtr, false, 0)
1402 .addImm(0)
1404 footMBB->addSuccessor(&MBB);
1405 }
1406
1407 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1408 }
1409 } else {
1410 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1411 .addReg(Reg)
1412 .addImm(Val)
1414
1415 // The EFLAGS implicit def is dead.
1416 MI->getOperand(3).setIsDead();
1417 }
1418}
1419
1421 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1422 // clobbered by any interrupt handler.
1423 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1424 "MF used frame lowering for wrong subtarget");
1425 const Function &Fn = MF.getFunction();
1426 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1427 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1428}
1429
1430/// Return true if we need to use the restricted Windows x64 prologue and
1431/// epilogue code patterns that can be described with WinCFI (.seh_*
1432/// directives).
1433bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1434 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1435}
1436
1437bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1438 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1439}
1440
1441/// Return true if an opcode is part of the REP group of instructions
1442static bool isOpcodeRep(unsigned Opcode) {
1443 switch (Opcode) {
1444 case X86::REPNE_PREFIX:
1445 case X86::REP_MOVSB_32:
1446 case X86::REP_MOVSB_64:
1447 case X86::REP_MOVSD_32:
1448 case X86::REP_MOVSD_64:
1449 case X86::REP_MOVSQ_32:
1450 case X86::REP_MOVSQ_64:
1451 case X86::REP_MOVSW_32:
1452 case X86::REP_MOVSW_64:
1453 case X86::REP_PREFIX:
1454 case X86::REP_STOSB_32:
1455 case X86::REP_STOSB_64:
1456 case X86::REP_STOSD_32:
1457 case X86::REP_STOSD_64:
1458 case X86::REP_STOSQ_32:
1459 case X86::REP_STOSQ_64:
1460 case X86::REP_STOSW_32:
1461 case X86::REP_STOSW_64:
1462 return true;
1463 default:
1464 break;
1465 }
1466 return false;
1467}
1468
1469/// emitPrologue - Push callee-saved registers onto the stack, which
1470/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1471/// space for local variables. Also emit labels used by the exception handler to
1472/// generate the exception handling frames.
1473
1474/*
1475 Here's a gist of what gets emitted:
1476
1477 ; Establish frame pointer, if needed
1478 [if needs FP]
1479 push %rbp
1480 .cfi_def_cfa_offset 16
1481 .cfi_offset %rbp, -16
1482 .seh_pushreg %rpb
1483 mov %rsp, %rbp
1484 .cfi_def_cfa_register %rbp
1485
1486 ; Spill general-purpose registers
1487 [for all callee-saved GPRs]
1488 pushq %<reg>
1489 [if not needs FP]
1490 .cfi_def_cfa_offset (offset from RETADDR)
1491 .seh_pushreg %<reg>
1492
1493 ; If the required stack alignment > default stack alignment
1494 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1495 ; of unknown size in the stack frame.
1496 [if stack needs re-alignment]
1497 and $MASK, %rsp
1498
1499 ; Allocate space for locals
1500 [if target is Windows and allocated space > 4096 bytes]
1501 ; Windows needs special care for allocations larger
1502 ; than one page.
1503 mov $NNN, %rax
1504 call ___chkstk_ms/___chkstk
1505 sub %rax, %rsp
1506 [else]
1507 sub $NNN, %rsp
1508
1509 [if needs FP]
1510 .seh_stackalloc (size of XMM spill slots)
1511 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1512 [else]
1513 .seh_stackalloc NNN
1514
1515 ; Spill XMMs
1516 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1517 ; they may get spilled on any platform, if the current function
1518 ; calls @llvm.eh.unwind.init
1519 [if needs FP]
1520 [for all callee-saved XMM registers]
1521 movaps %<xmm reg>, -MMM(%rbp)
1522 [for all callee-saved XMM registers]
1523 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1524 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1525 [else]
1526 [for all callee-saved XMM registers]
1527 movaps %<xmm reg>, KKK(%rsp)
1528 [for all callee-saved XMM registers]
1529 .seh_savexmm %<xmm reg>, KKK
1530
1531 .seh_endprologue
1532
1533 [if needs base pointer]
1534 mov %rsp, %rbx
1535 [if needs to restore base pointer]
1536 mov %rsp, -MMM(%rbp)
1537
1538 ; Emit CFI info
1539 [if needs FP]
1540 [for all callee-saved registers]
1541 .cfi_offset %<reg>, (offset from %rbp)
1542 [else]
1543 .cfi_def_cfa_offset (offset from RETADDR)
1544 [for all callee-saved registers]
1545 .cfi_offset %<reg>, (offset from %rsp)
1546
1547 Notes:
1548 - .seh directives are emitted only for Windows 64 ABI
1549 - .cv_fpo directives are emitted on win32 when emitting CodeView
1550 - .cfi directives are emitted for all other ABIs
1551 - for 32-bit code, substitute %e?? registers for %r??
1552*/
1553
1555 MachineBasicBlock &MBB) const {
1556 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1557 "MF used frame lowering for wrong subtarget");
1559 MachineFrameInfo &MFI = MF.getFrameInfo();
1560 const Function &Fn = MF.getFunction();
1562 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1563 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1564 bool IsFunclet = MBB.isEHFuncletEntry();
1566 if (Fn.hasPersonalityFn())
1567 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1568 bool FnHasClrFunclet =
1569 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1570 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1571 bool HasFP = hasFP(MF);
1572 bool IsWin64Prologue = isWin64Prologue(MF);
1573 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1574 // FIXME: Emit FPO data for EH funclets.
1575 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1577 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1578 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1580 const Register MachineFramePtr =
1582 : FramePtr;
1583 Register BasePtr = TRI->getBaseRegister();
1584 bool HasWinCFI = false;
1585
1586 // Debug location must be unknown since the first debug location is used
1587 // to determine the end of the prologue.
1588 DebugLoc DL;
1589 Register ArgBaseReg;
1590
1591 // Emit extra prolog for argument stack slot reference.
1592 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1593 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1594 // Creat extra prolog for stack realignment.
1595 ArgBaseReg = MI->getOperand(0).getReg();
1596 // leal 4(%esp), %basereg
1597 // .cfi_def_cfa %basereg, 0
1598 // andl $-128, %esp
1599 // pushl -4(%basereg)
1600 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1601 ArgBaseReg)
1603 .addImm(1)
1604 .addUse(X86::NoRegister)
1606 .addUse(X86::NoRegister)
1608 if (NeedsDwarfCFI) {
1609 // .cfi_def_cfa %basereg, 0
1610 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1611 BuildCFI(MBB, MBBI, DL,
1612 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1614 }
1615 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1616 int64_t Offset = -(int64_t)SlotSize;
1617 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1618 .addReg(ArgBaseReg)
1619 .addImm(1)
1620 .addReg(X86::NoRegister)
1621 .addImm(Offset)
1622 .addReg(X86::NoRegister)
1624 }
1625
1626 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1627 // tail call.
1628 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1629 if (TailCallArgReserveSize && IsWin64Prologue)
1630 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1631
1632 const bool EmitStackProbeCall =
1634 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1635
1636 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1640 // The special symbol below is absolute and has a *value* suitable to be
1641 // combined with the frame pointer directly.
1642 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1643 .addUse(MachineFramePtr)
1644 .addUse(X86::RIP)
1645 .addImm(1)
1646 .addUse(X86::NoRegister)
1647 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1649 .addUse(X86::NoRegister);
1650 break;
1651 }
1652 [[fallthrough]];
1653
1655 assert(
1656 !IsWin64Prologue &&
1657 "win64 prologue does not set the bit 60 in the saved frame pointer");
1658 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1659 .addUse(MachineFramePtr)
1660 .addImm(60)
1662 break;
1663
1665 break;
1666 }
1667 }
1668
1669 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1670 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1671 // stack alignment.
1673 Fn.arg_size() == 2) {
1674 StackSize += 8;
1675 MFI.setStackSize(StackSize);
1676
1677 // Update the stack pointer by pushing a register. This is the instruction
1678 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1679 // Hard-coding the update to a push avoids emitting a second
1680 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1681 // probing isn't needed anyways for an 8-byte update.
1682 // Pushing a register leaves us in a similar situation to a regular
1683 // function call where we know that the address at (rsp-8) is writeable.
1684 // That way we avoid any off-by-ones with stack probing for additional
1685 // stack pointer updates later on.
1686 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1687 .addReg(X86::RAX, RegState::Undef)
1689 }
1690
1691 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1692 // function, and use up to 128 bytes of stack space, don't have a frame
1693 // pointer, calls, or dynamic alloca then we do not need to adjust the
1694 // stack pointer (we fit in the Red Zone). We also check that we don't
1695 // push and pop from the stack.
1696 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1697 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1698 !MFI.adjustsStack() && // No calls.
1699 !EmitStackProbeCall && // No stack probes.
1700 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1701 !MF.shouldSplitStack()) { // Regular stack
1702 uint64_t MinSize =
1704 if (HasFP)
1705 MinSize += SlotSize;
1706 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1707 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1708 MFI.setStackSize(StackSize);
1709 }
1710
1711 // Insert stack pointer adjustment for later moving of return addr. Only
1712 // applies to tail call optimized functions where the callee argument stack
1713 // size is bigger than the callers.
1714 if (TailCallArgReserveSize != 0) {
1715 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1716 /*InEpilogue=*/false)
1718 }
1719
1720 // Mapping for machine moves:
1721 //
1722 // DST: VirtualFP AND
1723 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1724 // ELSE => DW_CFA_def_cfa
1725 //
1726 // SRC: VirtualFP AND
1727 // DST: Register => DW_CFA_def_cfa_register
1728 //
1729 // ELSE
1730 // OFFSET < 0 => DW_CFA_offset_extended_sf
1731 // REG < 64 => DW_CFA_offset + Reg
1732 // ELSE => DW_CFA_offset_extended
1733
1734 uint64_t NumBytes = 0;
1735 int stackGrowth = -SlotSize;
1736
1737 // Find the funclet establisher parameter
1738 Register Establisher = X86::NoRegister;
1739 if (IsClrFunclet)
1740 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1741 else if (IsFunclet)
1742 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1743
1744 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1745 // Immediately spill establisher into the home slot.
1746 // The runtime cares about this.
1747 // MOV64mr %rdx, 16(%rsp)
1748 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1749 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1750 .addReg(Establisher)
1752 MBB.addLiveIn(Establisher);
1753 }
1754
1755 if (HasFP) {
1756 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1757
1758 // Calculate required stack adjustment.
1759 uint64_t FrameSize = StackSize - SlotSize;
1760 NumBytes =
1761 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1762
1763 // Callee-saved registers are pushed on stack before the stack is realigned.
1764 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1765 NumBytes = alignTo(NumBytes, MaxAlign);
1766
1767 // Save EBP/RBP into the appropriate stack slot.
1768 BuildMI(MBB, MBBI, DL,
1770 .addReg(MachineFramePtr, RegState::Kill)
1772
1773 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1774 // Mark the place where EBP/RBP was saved.
1775 // Define the current CFA rule to use the provided offset.
1776 assert(StackSize);
1777 BuildCFI(MBB, MBBI, DL,
1779 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1781
1782 // Change the rule for the FramePtr to be an "offset" rule.
1783 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1784 BuildCFI(MBB, MBBI, DL,
1785 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1786 2 * stackGrowth -
1787 (int)TailCallArgReserveSize),
1789 }
1790
1791 if (NeedsWinCFI) {
1792 HasWinCFI = true;
1793 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1796 }
1797
1798 if (!IsFunclet) {
1799 if (X86FI->hasSwiftAsyncContext()) {
1800 assert(!IsWin64Prologue &&
1801 "win64 prologue does not store async context right below rbp");
1802 const auto &Attrs = MF.getFunction().getAttributes();
1803
1804 // Before we update the live frame pointer we have to ensure there's a
1805 // valid (or null) asynchronous context in its slot just before FP in
1806 // the frame record, so store it now.
1807 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1808 // We have an initial context in r14, store it just before the frame
1809 // pointer.
1810 MBB.addLiveIn(X86::R14);
1811 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1812 .addReg(X86::R14)
1814 } else {
1815 // No initial context, store null so that there's no pointer that
1816 // could be misused.
1817 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1818 .addImm(0)
1820 }
1821
1822 if (NeedsWinCFI) {
1823 HasWinCFI = true;
1824 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1825 .addImm(X86::R14)
1827 }
1828
1829 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1830 .addUse(X86::RSP)
1831 .addImm(1)
1832 .addUse(X86::NoRegister)
1833 .addImm(8)
1834 .addUse(X86::NoRegister)
1836 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1837 .addUse(X86::RSP)
1838 .addImm(8)
1840 }
1841
1842 if (!IsWin64Prologue && !IsFunclet) {
1843 // Update EBP with the new base value.
1844 if (!X86FI->hasSwiftAsyncContext())
1845 BuildMI(MBB, MBBI, DL,
1846 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1847 FramePtr)
1850
1851 if (NeedsDwarfCFI) {
1852 if (ArgBaseReg.isValid()) {
1853 SmallString<64> CfaExpr;
1854 CfaExpr.push_back(dwarf::DW_CFA_expression);
1855 uint8_t buffer[16];
1856 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1857 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1858 CfaExpr.push_back(2);
1859 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1860 CfaExpr.push_back(0);
1861 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1862 BuildCFI(MBB, MBBI, DL,
1863 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1865 } else {
1866 // Mark effective beginning of when frame pointer becomes valid.
1867 // Define the current CFA to use the EBP/RBP register.
1868 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1869 BuildCFI(
1870 MBB, MBBI, DL,
1871 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1873 }
1874 }
1875
1876 if (NeedsWinFPO) {
1877 // .cv_fpo_setframe $FramePtr
1878 HasWinCFI = true;
1879 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1881 .addImm(0)
1883 }
1884 }
1885 }
1886 } else {
1887 assert(!IsFunclet && "funclets without FPs not yet implemented");
1888 NumBytes =
1889 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1890 }
1891
1892 // Update the offset adjustment, which is mainly used by codeview to translate
1893 // from ESP to VFRAME relative local variable offsets.
1894 if (!IsFunclet) {
1895 if (HasFP && TRI->hasStackRealignment(MF))
1896 MFI.setOffsetAdjustment(-NumBytes);
1897 else
1898 MFI.setOffsetAdjustment(-StackSize);
1899 }
1900
1901 // For EH funclets, only allocate enough space for outgoing calls. Save the
1902 // NumBytes value that we would've used for the parent frame.
1903 unsigned ParentFrameNumBytes = NumBytes;
1904 if (IsFunclet)
1905 NumBytes = getWinEHFuncletFrameSize(MF);
1906
1907 // Skip the callee-saved push instructions.
1908 bool PushedRegs = false;
1909 int StackOffset = 2 * stackGrowth;
1911 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1912 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1913 return false;
1914 unsigned Opc = MBBI->getOpcode();
1915 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1916 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1917 };
1918
1919 while (IsCSPush(MBBI)) {
1920 PushedRegs = true;
1921 Register Reg = MBBI->getOperand(0).getReg();
1922 LastCSPush = MBBI;
1923 ++MBBI;
1924 unsigned Opc = LastCSPush->getOpcode();
1925
1926 if (!HasFP && NeedsDwarfCFI) {
1927 // Mark callee-saved push instruction.
1928 // Define the current CFA rule to use the provided offset.
1929 assert(StackSize);
1930 // Compared to push, push2 introduces more stack offset (one more
1931 // register).
1932 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1933 StackOffset += stackGrowth;
1934 BuildCFI(MBB, MBBI, DL,
1937 StackOffset += stackGrowth;
1938 }
1939
1940 if (NeedsWinCFI) {
1941 HasWinCFI = true;
1942 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1943 .addImm(Reg)
1945 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1946 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1947 .addImm(LastCSPush->getOperand(1).getReg())
1949 }
1950 }
1951
1952 // Realign stack after we pushed callee-saved registers (so that we'll be
1953 // able to calculate their offsets from the frame pointer).
1954 // Don't do this for Win64, it needs to realign the stack after the prologue.
1955 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1956 !ArgBaseReg.isValid()) {
1957 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1958 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1959
1960 if (NeedsWinCFI) {
1961 HasWinCFI = true;
1962 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1963 .addImm(MaxAlign)
1965 }
1966 }
1967
1968 // If there is an SUB32ri of ESP immediately before this instruction, merge
1969 // the two. This can be the case when tail call elimination is enabled and
1970 // the callee has more arguments then the caller.
1971 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1972
1973 // Adjust stack pointer: ESP -= numbytes.
1974
1975 // Windows and cygwin/mingw require a prologue helper routine when allocating
1976 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1977 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1978 // stack and adjust the stack pointer in one go. The 64-bit version of
1979 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1980 // responsible for adjusting the stack pointer. Touching the stack at 4K
1981 // increments is necessary to ensure that the guard pages used by the OS
1982 // virtual memory manager are allocated in correct sequence.
1983 uint64_t AlignedNumBytes = NumBytes;
1984 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1985 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1986 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1987 assert(!X86FI->getUsesRedZone() &&
1988 "The Red Zone is not accounted for in stack probes");
1989
1990 // Check whether EAX is livein for this block.
1991 bool isEAXAlive = isEAXLiveIn(MBB);
1992
1993 if (isEAXAlive) {
1994 if (Is64Bit) {
1995 // Save RAX
1996 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1997 .addReg(X86::RAX, RegState::Kill)
1999 } else {
2000 // Save EAX
2001 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
2002 .addReg(X86::EAX, RegState::Kill)
2004 }
2005 }
2006
2007 if (Is64Bit) {
2008 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2009 // Function prologue is responsible for adjusting the stack pointer.
2010 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2011 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
2012 .addImm(Alloc)
2014 } else {
2015 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2016 // We'll also use 4 already allocated bytes for EAX.
2017 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2018 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2020 }
2021
2022 // Call __chkstk, __chkstk_ms, or __alloca.
2023 emitStackProbe(MF, MBB, MBBI, DL, true);
2024
2025 if (isEAXAlive) {
2026 // Restore RAX/EAX
2028 if (Is64Bit)
2029 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2030 StackPtr, false, NumBytes - 8);
2031 else
2032 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2033 StackPtr, false, NumBytes - 4);
2034 MI->setFlag(MachineInstr::FrameSetup);
2035 MBB.insert(MBBI, MI);
2036 }
2037 } else if (NumBytes) {
2038 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2039 }
2040
2041 if (NeedsWinCFI && NumBytes) {
2042 HasWinCFI = true;
2043 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2044 .addImm(NumBytes)
2046 }
2047
2048 int SEHFrameOffset = 0;
2049 unsigned SPOrEstablisher;
2050 if (IsFunclet) {
2051 if (IsClrFunclet) {
2052 // The establisher parameter passed to a CLR funclet is actually a pointer
2053 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2054 // to find the root function establisher frame by loading the PSPSym from
2055 // the intermediate frame.
2056 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2057 MachinePointerInfo NoInfo;
2058 MBB.addLiveIn(Establisher);
2059 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2060 Establisher, false, PSPSlotOffset)
2063 ;
2064 // Save the root establisher back into the current funclet's (mostly
2065 // empty) frame, in case a sub-funclet or the GC needs it.
2066 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2067 false, PSPSlotOffset)
2068 .addReg(Establisher)
2070 NoInfo,
2073 }
2074 SPOrEstablisher = Establisher;
2075 } else {
2076 SPOrEstablisher = StackPtr;
2077 }
2078
2079 if (IsWin64Prologue && HasFP) {
2080 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2081 // this calculation on the incoming establisher, which holds the value of
2082 // RSP from the parent frame at the end of the prologue.
2083 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2084 if (SEHFrameOffset)
2085 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2086 SPOrEstablisher, false, SEHFrameOffset);
2087 else
2088 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2089 .addReg(SPOrEstablisher);
2090
2091 // If this is not a funclet, emit the CFI describing our frame pointer.
2092 if (NeedsWinCFI && !IsFunclet) {
2093 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2094 HasWinCFI = true;
2095 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2097 .addImm(SEHFrameOffset)
2099 if (isAsynchronousEHPersonality(Personality))
2100 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2101 }
2102 } else if (IsFunclet && STI.is32Bit()) {
2103 // Reset EBP / ESI to something good for funclets.
2105 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2106 // into the registration node so that the runtime will restore it for us.
2107 if (!MBB.isCleanupFuncletEntry()) {
2108 assert(Personality == EHPersonality::MSVC_CXX);
2109 Register FrameReg;
2111 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2112 // ESP is the first field, so no extra displacement is needed.
2113 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2114 false, EHRegOffset)
2115 .addReg(X86::ESP);
2116 }
2117 }
2118
2119 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2120 const MachineInstr &FrameInstr = *MBBI;
2121 ++MBBI;
2122
2123 if (NeedsWinCFI) {
2124 int FI;
2125 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2126 if (X86::FR64RegClass.contains(Reg)) {
2127 int Offset;
2128 Register IgnoredFrameReg;
2129 if (IsWin64Prologue && IsFunclet)
2130 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2131 else
2132 Offset =
2133 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2134 SEHFrameOffset;
2135
2136 HasWinCFI = true;
2137 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2138 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2139 .addImm(Reg)
2140 .addImm(Offset)
2142 }
2143 }
2144 }
2145 }
2146
2147 if (NeedsWinCFI && HasWinCFI)
2148 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2150
2151 if (FnHasClrFunclet && !IsFunclet) {
2152 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2153 // immediately after the prolog) into the PSPSlot so that funclets
2154 // and the GC can recover it.
2155 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2156 auto PSPInfo = MachinePointerInfo::getFixedStack(
2158 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2159 PSPSlotOffset)
2164 }
2165
2166 // Realign stack after we spilled callee-saved registers (so that we'll be
2167 // able to calculate their offsets from the frame pointer).
2168 // Win64 requires aligning the stack after the prologue.
2169 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2170 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2171 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2172 }
2173
2174 // We already dealt with stack realignment and funclets above.
2175 if (IsFunclet && STI.is32Bit())
2176 return;
2177
2178 // If we need a base pointer, set it up here. It's whatever the value
2179 // of the stack pointer is at this point. Any variable size objects
2180 // will be allocated after this, so we can still use the base pointer
2181 // to reference locals.
2182 if (TRI->hasBasePointer(MF)) {
2183 // Update the base pointer with the current stack pointer.
2184 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2185 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2186 .addReg(SPOrEstablisher)
2188 if (X86FI->getRestoreBasePointer()) {
2189 // Stash value of base pointer. Saving RSP instead of EBP shortens
2190 // dependence chain. Used by SjLj EH.
2191 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2192 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2194 .addReg(SPOrEstablisher)
2196 }
2197
2198 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2199 // Stash the value of the frame pointer relative to the base pointer for
2200 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2201 // it recovers the frame pointer from the base pointer rather than the
2202 // other way around.
2203 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2204 Register UsedReg;
2205 int Offset =
2206 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2207 .getFixed();
2208 assert(UsedReg == BasePtr);
2209 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2212 }
2213 }
2214 if (ArgBaseReg.isValid()) {
2215 // Save argument base pointer.
2216 auto *MI = X86FI->getStackPtrSaveMI();
2217 int FI = MI->getOperand(1).getIndex();
2218 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2219 // movl %basereg, offset(%ebp)
2220 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2221 .addReg(ArgBaseReg)
2223 }
2224
2225 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2226 // Mark end of stack pointer adjustment.
2227 if (!HasFP && NumBytes) {
2228 // Define the current CFA rule to use the provided offset.
2229 assert(StackSize);
2230 BuildCFI(
2231 MBB, MBBI, DL,
2232 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2234 }
2235
2236 // Emit DWARF info specifying the offsets of the callee-saved registers.
2238 }
2239
2240 // X86 Interrupt handling function cannot assume anything about the direction
2241 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2242 // in each prologue of interrupt handler function.
2243 //
2244 // Create "cld" instruction only in these cases:
2245 // 1. The interrupt handling function uses any of the "rep" instructions.
2246 // 2. Interrupt handling function calls another function.
2247 // 3. If there are any inline asm blocks, as we do not know what they do
2248 //
2249 // TODO: We should also emit cld if we detect the use of std, but as of now,
2250 // the compiler does not even emit that instruction or even define it, so in
2251 // practice, this would only happen with inline asm, which we cover anyway.
2253 bool NeedsCLD = false;
2254
2255 for (const MachineBasicBlock &B : MF) {
2256 for (const MachineInstr &MI : B) {
2257 if (MI.isCall()) {
2258 NeedsCLD = true;
2259 break;
2260 }
2261
2262 if (isOpcodeRep(MI.getOpcode())) {
2263 NeedsCLD = true;
2264 break;
2265 }
2266
2267 if (MI.isInlineAsm()) {
2268 // TODO: Parse asm for rep instructions or call sites?
2269 // For now, let's play it safe and emit a cld instruction
2270 // just in case.
2271 NeedsCLD = true;
2272 break;
2273 }
2274 }
2275 }
2276
2277 if (NeedsCLD) {
2278 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2280 }
2281 }
2282
2283 // At this point we know if the function has WinCFI or not.
2284 MF.setHasWinCFI(HasWinCFI);
2285}
2286
2288 const MachineFunction &MF) const {
2289 // We can't use LEA instructions for adjusting the stack pointer if we don't
2290 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2291 // to deallocate the stack.
2292 // This means that we can use LEA for SP in two situations:
2293 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2294 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2295 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2296}
2297
2299 switch (MI.getOpcode()) {
2300 case X86::CATCHRET:
2301 case X86::CLEANUPRET:
2302 return true;
2303 default:
2304 return false;
2305 }
2306 llvm_unreachable("impossible");
2307}
2308
2309// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2310// stack. It holds a pointer to the bottom of the root function frame. The
2311// establisher frame pointer passed to a nested funclet may point to the
2312// (mostly empty) frame of its parent funclet, but it will need to find
2313// the frame of the root function to access locals. To facilitate this,
2314// every funclet copies the pointer to the bottom of the root function
2315// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2316// same offset for the PSPSym in the root function frame that's used in the
2317// funclets' frames allows each funclet to dynamically accept any ancestor
2318// frame as its establisher argument (the runtime doesn't guarantee the
2319// immediate parent for some reason lost to history), and also allows the GC,
2320// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2321// frame with only a single offset reported for the entire method.
2322unsigned
2323X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2324 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2326 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2327 /*IgnoreSPUpdates*/ true)
2328 .getFixed();
2329 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2330 return static_cast<unsigned>(Offset);
2331}
2332
2333unsigned
2334X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2336 // This is the size of the pushed CSRs.
2337 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2338 // This is the size of callee saved XMMs.
2339 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2340 unsigned XMMSize =
2341 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2342 // This is the amount of stack a funclet needs to allocate.
2343 unsigned UsedSize;
2344 EHPersonality Personality =
2346 if (Personality == EHPersonality::CoreCLR) {
2347 // CLR funclets need to hold enough space to include the PSPSym, at the
2348 // same offset from the stack pointer (immediately after the prolog) as it
2349 // resides at in the main function.
2350 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2351 } else {
2352 // Other funclets just need enough stack for outgoing call arguments.
2353 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2354 }
2355 // RBP is not included in the callee saved register block. After pushing RBP,
2356 // everything is 16 byte aligned. Everything we allocate before an outgoing
2357 // call must also be 16 byte aligned.
2358 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2359 // Subtract out the size of the callee saved registers. This is how much stack
2360 // each funclet will allocate.
2361 return FrameSizeMinusRBP + XMMSize - CSSize;
2362}
2363
2364static bool isTailCallOpcode(unsigned Opc) {
2365 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2366 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2367 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2368}
2369
2371 MachineBasicBlock &MBB) const {
2372 const MachineFrameInfo &MFI = MF.getFrameInfo();
2375 MachineBasicBlock::iterator MBBI = Terminator;
2376 DebugLoc DL;
2377 if (MBBI != MBB.end())
2378 DL = MBBI->getDebugLoc();
2379 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2380 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2382 Register MachineFramePtr =
2383 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2384
2385 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2386 bool NeedsWin64CFI =
2387 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2388 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2389
2390 // Get the number of bytes to allocate from the FrameInfo.
2391 uint64_t StackSize = MFI.getStackSize();
2392 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2393 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2394 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2395 bool HasFP = hasFP(MF);
2396 uint64_t NumBytes = 0;
2397
2398 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2400 MF.needsFrameMoves();
2401
2402 Register ArgBaseReg;
2403 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2404 unsigned Opc = X86::LEA32r;
2405 Register StackReg = X86::ESP;
2406 ArgBaseReg = MI->getOperand(0).getReg();
2407 if (STI.is64Bit()) {
2408 Opc = X86::LEA64r;
2409 StackReg = X86::RSP;
2410 }
2411 // leal -4(%basereg), %esp
2412 // .cfi_def_cfa %esp, 4
2413 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2414 .addUse(ArgBaseReg)
2415 .addImm(1)
2416 .addUse(X86::NoRegister)
2417 .addImm(-(int64_t)SlotSize)
2418 .addUse(X86::NoRegister)
2420 if (NeedsDwarfCFI) {
2421 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2422 BuildCFI(MBB, MBBI, DL,
2423 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2425 --MBBI;
2426 }
2427 --MBBI;
2428 }
2429
2430 if (IsFunclet) {
2431 assert(HasFP && "EH funclets without FP not yet implemented");
2432 NumBytes = getWinEHFuncletFrameSize(MF);
2433 } else if (HasFP) {
2434 // Calculate required stack adjustment.
2435 uint64_t FrameSize = StackSize - SlotSize;
2436 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2437
2438 // Callee-saved registers were pushed on stack before the stack was
2439 // realigned.
2440 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2441 NumBytes = alignTo(FrameSize, MaxAlign);
2442 } else {
2443 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2444 }
2445 uint64_t SEHStackAllocAmt = NumBytes;
2446
2447 // AfterPop is the position to insert .cfi_restore.
2449 if (HasFP) {
2450 if (X86FI->hasSwiftAsyncContext()) {
2451 // Discard the context.
2452 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2453 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2454 }
2455 // Pop EBP.
2456 BuildMI(MBB, MBBI, DL,
2458 MachineFramePtr)
2460
2461 // We need to reset FP to its untagged state on return. Bit 60 is currently
2462 // used to show the presence of an extended frame.
2463 if (X86FI->hasSwiftAsyncContext()) {
2464 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2465 .addUse(MachineFramePtr)
2466 .addImm(60)
2468 }
2469
2470 if (NeedsDwarfCFI) {
2471 if (!ArgBaseReg.isValid()) {
2472 unsigned DwarfStackPtr =
2473 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2474 BuildCFI(MBB, MBBI, DL,
2475 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2477 }
2478 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2479 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2480 BuildCFI(MBB, AfterPop, DL,
2481 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2483 --MBBI;
2484 --AfterPop;
2485 }
2486 --MBBI;
2487 }
2488 }
2489
2490 MachineBasicBlock::iterator FirstCSPop = MBBI;
2491 // Skip the callee-saved pop instructions.
2492 while (MBBI != MBB.begin()) {
2493 MachineBasicBlock::iterator PI = std::prev(MBBI);
2494 unsigned Opc = PI->getOpcode();
2495
2496 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2497 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2498 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2499 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2500 Opc != X86::POP2P && Opc != X86::LEA64r))
2501 break;
2502 FirstCSPop = PI;
2503 }
2504
2505 --MBBI;
2506 }
2507 if (ArgBaseReg.isValid()) {
2508 // Restore argument base pointer.
2509 auto *MI = X86FI->getStackPtrSaveMI();
2510 int FI = MI->getOperand(1).getIndex();
2511 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2512 // movl offset(%ebp), %basereg
2513 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2515 }
2516 MBBI = FirstCSPop;
2517
2518 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2519 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2520
2521 if (MBBI != MBB.end())
2522 DL = MBBI->getDebugLoc();
2523 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2524 // instruction, merge the two instructions.
2525 if (NumBytes || MFI.hasVarSizedObjects())
2526 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2527
2528 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2529 // slot before popping them off! Same applies for the case, when stack was
2530 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2531 // will not do realignment or dynamic stack allocation.
2532 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2533 !IsFunclet) {
2534 if (TRI->hasStackRealignment(MF))
2535 MBBI = FirstCSPop;
2536 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2537 uint64_t LEAAmount =
2538 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2539
2540 if (X86FI->hasSwiftAsyncContext())
2541 LEAAmount -= 16;
2542
2543 // There are only two legal forms of epilogue:
2544 // - add SEHAllocationSize, %rsp
2545 // - lea SEHAllocationSize(%FramePtr), %rsp
2546 //
2547 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2548 // However, we may use this sequence if we have a frame pointer because the
2549 // effects of the prologue can safely be undone.
2550 if (LEAAmount != 0) {
2551 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2553 false, LEAAmount);
2554 --MBBI;
2555 } else {
2556 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2557 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2558 --MBBI;
2559 }
2560 } else if (NumBytes) {
2561 // Adjust stack pointer back: ESP += numbytes.
2562 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2563 if (!HasFP && NeedsDwarfCFI) {
2564 // Define the current CFA rule to use the provided offset.
2565 BuildCFI(MBB, MBBI, DL,
2567 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2569 }
2570 --MBBI;
2571 }
2572
2573 // Windows unwinder will not invoke function's exception handler if IP is
2574 // either in prologue or in epilogue. This behavior causes a problem when a
2575 // call immediately precedes an epilogue, because the return address points
2576 // into the epilogue. To cope with that, we insert an epilogue marker here,
2577 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2578 // final emitted code.
2579 if (NeedsWin64CFI && MF.hasWinCFI())
2580 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2581
2582 if (!HasFP && NeedsDwarfCFI) {
2583 MBBI = FirstCSPop;
2584 int64_t Offset = -(int64_t)CSSize - SlotSize;
2585 // Mark callee-saved pop instruction.
2586 // Define the current CFA rule to use the provided offset.
2587 while (MBBI != MBB.end()) {
2589 unsigned Opc = PI->getOpcode();
2590 ++MBBI;
2591 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2592 Opc == X86::POP2 || Opc == X86::POP2P) {
2593 Offset += SlotSize;
2594 // Compared to pop, pop2 introduces more stack offset (one more
2595 // register).
2596 if (Opc == X86::POP2 || Opc == X86::POP2P)
2597 Offset += SlotSize;
2598 BuildCFI(MBB, MBBI, DL,
2601 }
2602 }
2603 }
2604
2605 // Emit DWARF info specifying the restores of the callee-saved registers.
2606 // For epilogue with return inside or being other block without successor,
2607 // no need to generate .cfi_restore for callee-saved registers.
2608 if (NeedsDwarfCFI && !MBB.succ_empty())
2609 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2610
2611 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2612 // Add the return addr area delta back since we are not tail calling.
2613 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2614 assert(Offset >= 0 && "TCDelta should never be positive");
2615 if (Offset) {
2616 // Check for possible merge with preceding ADD instruction.
2617 Offset += mergeSPUpdates(MBB, Terminator, true);
2618 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2619 }
2620 }
2621
2622 // Emit tilerelease for AMX kernel.
2624 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2625}
2626
2628 int FI,
2629 Register &FrameReg) const {
2630 const MachineFrameInfo &MFI = MF.getFrameInfo();
2631
2632 bool IsFixed = MFI.isFixedObjectIndex(FI);
2633 // We can't calculate offset from frame pointer if the stack is realigned,
2634 // so enforce usage of stack/base pointer. The base pointer is used when we
2635 // have dynamic allocas in addition to dynamic realignment.
2636 if (TRI->hasBasePointer(MF))
2637 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2638 else if (TRI->hasStackRealignment(MF))
2639 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2640 else
2641 FrameReg = TRI->getFrameRegister(MF);
2642
2643 // Offset will hold the offset from the stack pointer at function entry to the
2644 // object.
2645 // We need to factor in additional offsets applied during the prologue to the
2646 // frame, base, and stack pointer depending on which is used.
2649 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2650 uint64_t StackSize = MFI.getStackSize();
2651 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2652 int64_t FPDelta = 0;
2653
2654 // In an x86 interrupt, remove the offset we added to account for the return
2655 // address from any stack object allocated in the caller's frame. Interrupts
2656 // do not have a standard return address. Fixed objects in the current frame,
2657 // such as SSE register spills, should not get this treatment.
2659 Offset >= 0) {
2661 }
2662
2663 if (IsWin64Prologue) {
2664 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2665
2666 // Calculate required stack adjustment.
2667 uint64_t FrameSize = StackSize - SlotSize;
2668 // If required, include space for extra hidden slot for stashing base
2669 // pointer.
2670 if (X86FI->getRestoreBasePointer())
2671 FrameSize += SlotSize;
2672 uint64_t NumBytes = FrameSize - CSSize;
2673
2674 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2675 if (FI && FI == X86FI->getFAIndex())
2676 return StackOffset::getFixed(-SEHFrameOffset);
2677
2678 // FPDelta is the offset from the "traditional" FP location of the old base
2679 // pointer followed by return address and the location required by the
2680 // restricted Win64 prologue.
2681 // Add FPDelta to all offsets below that go through the frame pointer.
2682 FPDelta = FrameSize - SEHFrameOffset;
2683 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2684 "FPDelta isn't aligned per the Win64 ABI!");
2685 }
2686
2687 if (FrameReg == TRI->getFramePtr()) {
2688 // Skip saved EBP/RBP
2689 Offset += SlotSize;
2690
2691 // Account for restricted Windows prologue.
2692 Offset += FPDelta;
2693
2694 // Skip the RETADDR move area
2695 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2696 if (TailCallReturnAddrDelta < 0)
2697 Offset -= TailCallReturnAddrDelta;
2698
2700 }
2701
2702 // FrameReg is either the stack pointer or a base pointer. But the base is
2703 // located at the end of the statically known StackSize so the distinction
2704 // doesn't really matter.
2705 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2706 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2707 return StackOffset::getFixed(Offset + StackSize);
2708}
2709
2711 Register &FrameReg) const {
2712 const MachineFrameInfo &MFI = MF.getFrameInfo();
2714 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2715 const auto it = WinEHXMMSlotInfo.find(FI);
2716
2717 if (it == WinEHXMMSlotInfo.end())
2718 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2719
2720 FrameReg = TRI->getStackRegister();
2722 it->second;
2723}
2724
2727 Register &FrameReg,
2728 int Adjustment) const {
2729 const MachineFrameInfo &MFI = MF.getFrameInfo();
2730 FrameReg = TRI->getStackRegister();
2731 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2732 getOffsetOfLocalArea() + Adjustment);
2733}
2734
2737 int FI, Register &FrameReg,
2738 bool IgnoreSPUpdates) const {
2739
2740 const MachineFrameInfo &MFI = MF.getFrameInfo();
2741 // Does not include any dynamic realign.
2742 const uint64_t StackSize = MFI.getStackSize();
2743 // LLVM arranges the stack as follows:
2744 // ...
2745 // ARG2
2746 // ARG1
2747 // RETADDR
2748 // PUSH RBP <-- RBP points here
2749 // PUSH CSRs
2750 // ~~~~~~~ <-- possible stack realignment (non-win64)
2751 // ...
2752 // STACK OBJECTS
2753 // ... <-- RSP after prologue points here
2754 // ~~~~~~~ <-- possible stack realignment (win64)
2755 //
2756 // if (hasVarSizedObjects()):
2757 // ... <-- "base pointer" (ESI/RBX) points here
2758 // DYNAMIC ALLOCAS
2759 // ... <-- RSP points here
2760 //
2761 // Case 1: In the simple case of no stack realignment and no dynamic
2762 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2763 // with fixed offsets from RSP.
2764 //
2765 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2766 // stack objects are addressed with RBP and regular stack objects with RSP.
2767 //
2768 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2769 // to address stack arguments for outgoing calls and nothing else. The "base
2770 // pointer" points to local variables, and RBP points to fixed objects.
2771 //
2772 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2773 // answer we give is relative to the SP after the prologue, and not the
2774 // SP in the middle of the function.
2775
2776 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2777 !STI.isTargetWin64())
2778 return getFrameIndexReference(MF, FI, FrameReg);
2779
2780 // If !hasReservedCallFrame the function might have SP adjustement in the
2781 // body. So, even though the offset is statically known, it depends on where
2782 // we are in the function.
2783 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2784 return getFrameIndexReference(MF, FI, FrameReg);
2785
2786 // We don't handle tail calls, and shouldn't be seeing them either.
2788 "we don't handle this case!");
2789
2790 // This is how the math works out:
2791 //
2792 // %rsp grows (i.e. gets lower) left to right. Each box below is
2793 // one word (eight bytes). Obj0 is the stack slot we're trying to
2794 // get to.
2795 //
2796 // ----------------------------------
2797 // | BP | Obj0 | Obj1 | ... | ObjN |
2798 // ----------------------------------
2799 // ^ ^ ^ ^
2800 // A B C E
2801 //
2802 // A is the incoming stack pointer.
2803 // (B - A) is the local area offset (-8 for x86-64) [1]
2804 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2805 //
2806 // |(E - B)| is the StackSize (absolute value, positive). For a
2807 // stack that grown down, this works out to be (B - E). [3]
2808 //
2809 // E is also the value of %rsp after stack has been set up, and we
2810 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2811 // (C - E) == (C - A) - (B - A) + (B - E)
2812 // { Using [1], [2] and [3] above }
2813 // == getObjectOffset - LocalAreaOffset + StackSize
2814
2815 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2816}
2817
2820 std::vector<CalleeSavedInfo> &CSI) const {
2821 MachineFrameInfo &MFI = MF.getFrameInfo();
2823
2824 unsigned CalleeSavedFrameSize = 0;
2825 unsigned XMMCalleeSavedFrameSize = 0;
2826 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2827 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2828
2829 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2830
2831 if (TailCallReturnAddrDelta < 0) {
2832 // create RETURNADDR area
2833 // arg
2834 // arg
2835 // RETADDR
2836 // { ...
2837 // RETADDR area
2838 // ...
2839 // }
2840 // [EBP]
2841 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2842 TailCallReturnAddrDelta - SlotSize, true);
2843 }
2844
2845 // Spill the BasePtr if it's used.
2846 if (this->TRI->hasBasePointer(MF)) {
2847 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2848 if (MF.hasEHFunclets()) {
2850 X86FI->setHasSEHFramePtrSave(true);
2851 X86FI->setSEHFramePtrSaveIndex(FI);
2852 }
2853 }
2854
2855 if (hasFP(MF)) {
2856 // emitPrologue always spills frame register the first thing.
2857 SpillSlotOffset -= SlotSize;
2858 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2859
2860 // The async context lives directly before the frame pointer, and we
2861 // allocate a second slot to preserve stack alignment.
2862 if (X86FI->hasSwiftAsyncContext()) {
2863 SpillSlotOffset -= SlotSize;
2864 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2865 SpillSlotOffset -= SlotSize;
2866 }
2867
2868 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2869 // the frame register, we can delete it from CSI list and not have to worry
2870 // about avoiding it later.
2872 for (unsigned i = 0; i < CSI.size(); ++i) {
2873 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2874 CSI.erase(CSI.begin() + i);
2875 break;
2876 }
2877 }
2878 }
2879
2880 // Strategy:
2881 // 1. Use push2 when
2882 // a) number of CSR > 1 if no need padding
2883 // b) number of CSR > 2 if need padding
2884 // 2. When the number of CSR push is odd
2885 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2886 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2887 // 3. When the number of CSR push is even, start to use push2 from the 1st
2888 // push and make the stack 16B aligned before the push
2889 unsigned NumRegsForPush2 = 0;
2890 if (STI.hasPush2Pop2()) {
2891 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2892 return X86::GR64RegClass.contains(I.getReg());
2893 });
2894 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2895 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2896 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2897 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2898 if (X86FI->padForPush2Pop2()) {
2899 SpillSlotOffset -= SlotSize;
2900 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2901 }
2902 }
2903
2904 // Assign slots for GPRs. It increases frame size.
2905 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2906 Register Reg = I.getReg();
2907
2908 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2909 continue;
2910
2911 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2912 // or only an odd number of registers in the candidates.
2913 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2914 (SpillSlotOffset % 16 == 0 ||
2915 X86FI->getNumCandidatesForPush2Pop2() % 2))
2916 X86FI->addCandidateForPush2Pop2(Reg);
2917
2918 SpillSlotOffset -= SlotSize;
2919 CalleeSavedFrameSize += SlotSize;
2920
2921 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2922 I.setFrameIdx(SlotIndex);
2923 }
2924
2925 // Adjust the offset of spill slot as we know the accurate callee saved frame
2926 // size.
2927 if (X86FI->getRestoreBasePointer()) {
2928 SpillSlotOffset -= SlotSize;
2929 CalleeSavedFrameSize += SlotSize;
2930
2931 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2932 // TODO: saving the slot index is better?
2933 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2934 }
2935 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2936 "Expect even candidates for push2/pop2");
2937 if (X86FI->getNumCandidatesForPush2Pop2())
2938 ++NumFunctionUsingPush2Pop2;
2939 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2940 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2941
2942 // Assign slots for XMMs.
2943 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2944 Register Reg = I.getReg();
2945 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2946 continue;
2947
2948 // If this is k-register make sure we lookup via the largest legal type.
2949 MVT VT = MVT::Other;
2950 if (X86::VK16RegClass.contains(Reg))
2951 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2952
2953 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2954 unsigned Size = TRI->getSpillSize(*RC);
2955 Align Alignment = TRI->getSpillAlign(*RC);
2956 // ensure alignment
2957 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2958 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2959
2960 // spill into slot
2961 SpillSlotOffset -= Size;
2962 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2963 I.setFrameIdx(SlotIndex);
2964 MFI.ensureMaxAlignment(Alignment);
2965
2966 // Save the start offset and size of XMM in stack frame for funclets.
2967 if (X86::VR128RegClass.contains(Reg)) {
2968 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2969 XMMCalleeSavedFrameSize += Size;
2970 }
2971 }
2972
2973 return true;
2974}
2975
2980
2981 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2982 // for us, and there are no XMM CSRs on Win32.
2983 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2984 return true;
2985
2986 // Push GPRs. It increases frame size.
2987 const MachineFunction &MF = *MBB.getParent();
2989 if (X86FI->padForPush2Pop2())
2990 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2991
2992 // Update LiveIn of the basic block and decide whether we can add a kill flag
2993 // to the use.
2994 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
2995 const MachineRegisterInfo &MRI = MF.getRegInfo();
2996 // Do not set a kill flag on values that are also marked as live-in. This
2997 // happens with the @llvm-returnaddress intrinsic and with arguments
2998 // passed in callee saved registers.
2999 // Omitting the kill flags is conservatively correct even if the live-in
3000 // is not used after all.
3001 if (MRI.isLiveIn(Reg))
3002 return false;
3003 MBB.addLiveIn(Reg);
3004 // Check if any subregister is live-in
3005 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3006 if (MRI.isLiveIn(*AReg))
3007 return false;
3008 return true;
3009 };
3010 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3011 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
3012 };
3013
3014 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3015 Register Reg = RI->getReg();
3016 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3017 continue;
3018
3019 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3020 Register Reg2 = (++RI)->getReg();
3022 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3023 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3025 } else {
3026 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3027 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3029 }
3030 }
3031
3032 if (X86FI->getRestoreBasePointer()) {
3033 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3034 Register BaseReg = this->TRI->getBaseRegister();
3035 BuildMI(MBB, MI, DL, TII.get(Opc))
3036 .addReg(BaseReg, getKillRegState(true))
3038 }
3039
3040 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3041 // It can be done by spilling XMMs to stack frame.
3042 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3043 Register Reg = I.getReg();
3044 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3045 continue;
3046
3047 // If this is k-register make sure we lookup via the largest legal type.
3048 MVT VT = MVT::Other;
3049 if (X86::VK16RegClass.contains(Reg))
3050 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3051
3052 // Add the callee-saved register as live-in. It's killed at the spill.
3053 MBB.addLiveIn(Reg);
3054 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3055
3056 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3057 Register());
3058 --MI;
3059 MI->setFlag(MachineInstr::FrameSetup);
3060 ++MI;
3061 }
3062
3063 return true;
3064}
3065
3066void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3068 MachineInstr *CatchRet) const {
3069 // SEH shouldn't use catchret.
3072 "SEH should not use CATCHRET");
3073 const DebugLoc &DL = CatchRet->getDebugLoc();
3074 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3075
3076 // Fill EAX/RAX with the address of the target block.
3077 if (STI.is64Bit()) {
3078 // LEA64r CatchRetTarget(%rip), %rax
3079 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3080 .addReg(X86::RIP)
3081 .addImm(0)
3082 .addReg(0)
3083 .addMBB(CatchRetTarget)
3084 .addReg(0);
3085 } else {
3086 // MOV32ri $CatchRetTarget, %eax
3087 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3088 .addMBB(CatchRetTarget);
3089 }
3090
3091 // Record that we've taken the address of CatchRetTarget and no longer just
3092 // reference it in a terminator.
3093 CatchRetTarget->setMachineBlockAddressTaken();
3094}
3095
3099 if (CSI.empty())
3100 return false;
3101
3102 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3103 // Don't restore CSRs in 32-bit EH funclets. Matches
3104 // spillCalleeSavedRegisters.
3105 if (STI.is32Bit())
3106 return true;
3107 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3108 // funclets. emitEpilogue transforms these to normal jumps.
3109 if (MI->getOpcode() == X86::CATCHRET) {
3110 const Function &F = MBB.getParent()->getFunction();
3111 bool IsSEH = isAsynchronousEHPersonality(
3112 classifyEHPersonality(F.getPersonalityFn()));
3113 if (IsSEH)
3114 return true;
3115 }
3116 }
3117
3119
3120 // Reload XMMs from stack frame.
3121 for (const CalleeSavedInfo &I : CSI) {
3122 Register Reg = I.getReg();
3123 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3124 continue;
3125
3126 // If this is k-register make sure we lookup via the largest legal type.
3127 MVT VT = MVT::Other;
3128 if (X86::VK16RegClass.contains(Reg))
3129 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3130
3131 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3132 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3133 Register());
3134 }
3135
3136 // Clear the stack slot for spill base pointer register.
3137 MachineFunction &MF = *MBB.getParent();
3139 if (X86FI->getRestoreBasePointer()) {
3140 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3141 Register BaseReg = this->TRI->getBaseRegister();
3142 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3144 }
3145
3146 // POP GPRs.
3147 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3148 Register Reg = I->getReg();
3149 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3150 continue;
3151
3152 if (X86FI->isCandidateForPush2Pop2(Reg))
3153 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3156 else
3157 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3159 }
3160 if (X86FI->padForPush2Pop2())
3161 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3162
3163 return true;
3164}
3165
3167 BitVector &SavedRegs,
3168 RegScavenger *RS) const {
3170
3171 // Spill the BasePtr if it's used.
3172 if (TRI->hasBasePointer(MF)) {
3173 Register BasePtr = TRI->getBaseRegister();
3174 if (STI.isTarget64BitILP32())
3175 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3176 SavedRegs.set(BasePtr);
3177 }
3178}
3179
3180static bool HasNestArgument(const MachineFunction *MF) {
3181 const Function &F = MF->getFunction();
3182 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3183 I++) {
3184 if (I->hasNestAttr() && !I->use_empty())
3185 return true;
3186 }
3187 return false;
3188}
3189
3190/// GetScratchRegister - Get a temp register for performing work in the
3191/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3192/// and the properties of the function either one or two registers will be
3193/// needed. Set primary to true for the first register, false for the second.
3194static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3195 const MachineFunction &MF, bool Primary) {
3196 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3197
3198 // Erlang stuff.
3199 if (CallingConvention == CallingConv::HiPE) {
3200 if (Is64Bit)
3201 return Primary ? X86::R14 : X86::R13;
3202 else
3203 return Primary ? X86::EBX : X86::EDI;
3204 }
3205
3206 if (Is64Bit) {
3207 if (IsLP64)
3208 return Primary ? X86::R11 : X86::R12;
3209 else
3210 return Primary ? X86::R11D : X86::R12D;
3211 }
3212
3213 bool IsNested = HasNestArgument(&MF);
3214
3215 if (CallingConvention == CallingConv::X86_FastCall ||
3216 CallingConvention == CallingConv::Fast ||
3217 CallingConvention == CallingConv::Tail) {
3218 if (IsNested)
3219 report_fatal_error("Segmented stacks does not support fastcall with "
3220 "nested function.");
3221 return Primary ? X86::EAX : X86::ECX;
3222 }
3223 if (IsNested)
3224 return Primary ? X86::EDX : X86::EAX;
3225 return Primary ? X86::ECX : X86::EAX;
3226}
3227
3228// The stack limit in the TCB is set to this many bytes above the actual stack
3229// limit.
3231
3233 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3234 MachineFrameInfo &MFI = MF.getFrameInfo();
3235 uint64_t StackSize;
3236 unsigned TlsReg, TlsOffset;
3237 DebugLoc DL;
3238
3239 // To support shrink-wrapping we would need to insert the new blocks
3240 // at the right place and update the branches to PrologueMBB.
3241 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3242
3243 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3244 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3245 "Scratch register is live-in");
3246
3247 if (MF.getFunction().isVarArg())
3248 report_fatal_error("Segmented stacks do not support vararg functions.");
3249 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3252 report_fatal_error("Segmented stacks not supported on this platform.");
3253
3254 // Eventually StackSize will be calculated by a link-time pass; which will
3255 // also decide whether checking code needs to be injected into this particular
3256 // prologue.
3257 StackSize = MFI.getStackSize();
3258
3259 if (!MFI.needsSplitStackProlog())
3260 return;
3261
3265 bool IsNested = false;
3266
3267 // We need to know if the function has a nest argument only in 64 bit mode.
3268 if (Is64Bit)
3269 IsNested = HasNestArgument(&MF);
3270
3271 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3272 // allocMBB needs to be last (terminating) instruction.
3273
3274 for (const auto &LI : PrologueMBB.liveins()) {
3275 allocMBB->addLiveIn(LI);
3276 checkMBB->addLiveIn(LI);
3277 }
3278
3279 if (IsNested)
3280 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3281
3282 MF.push_front(allocMBB);
3283 MF.push_front(checkMBB);
3284
3285 // When the frame size is less than 256 we just compare the stack
3286 // boundary directly to the value of the stack pointer, per gcc.
3287 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3288
3289 // Read the limit off the current stacklet off the stack_guard location.
3290 if (Is64Bit) {
3291 if (STI.isTargetLinux()) {
3292 TlsReg = X86::FS;
3293 TlsOffset = IsLP64 ? 0x70 : 0x40;
3294 } else if (STI.isTargetDarwin()) {
3295 TlsReg = X86::GS;
3296 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3297 } else if (STI.isTargetWin64()) {
3298 TlsReg = X86::GS;
3299 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3300 } else if (STI.isTargetFreeBSD()) {
3301 TlsReg = X86::FS;
3302 TlsOffset = 0x18;
3303 } else if (STI.isTargetDragonFly()) {
3304 TlsReg = X86::FS;
3305 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3306 } else {
3307 report_fatal_error("Segmented stacks not supported on this platform.");
3308 }
3309
3310 if (CompareStackPointer)
3311 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3312 else
3313 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3314 ScratchReg)
3315 .addReg(X86::RSP)
3316 .addImm(1)
3317 .addReg(0)
3318 .addImm(-StackSize)
3319 .addReg(0);
3320
3321 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3322 .addReg(ScratchReg)
3323 .addReg(0)
3324 .addImm(1)
3325 .addReg(0)
3326 .addImm(TlsOffset)
3327 .addReg(TlsReg);
3328 } else {
3329 if (STI.isTargetLinux()) {
3330 TlsReg = X86::GS;
3331 TlsOffset = 0x30;
3332 } else if (STI.isTargetDarwin()) {
3333 TlsReg = X86::GS;
3334 TlsOffset = 0x48 + 90 * 4;
3335 } else if (STI.isTargetWin32()) {
3336 TlsReg = X86::FS;
3337 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3338 } else if (STI.isTargetDragonFly()) {
3339 TlsReg = X86::FS;
3340 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3341 } else if (STI.isTargetFreeBSD()) {
3342 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3343 } else {
3344 report_fatal_error("Segmented stacks not supported on this platform.");
3345 }
3346
3347 if (CompareStackPointer)
3348 ScratchReg = X86::ESP;
3349 else
3350 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3351 .addReg(X86::ESP)
3352 .addImm(1)
3353 .addReg(0)
3354 .addImm(-StackSize)
3355 .addReg(0);
3356
3359 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3360 .addReg(ScratchReg)
3361 .addReg(0)
3362 .addImm(0)
3363 .addReg(0)
3364 .addImm(TlsOffset)
3365 .addReg(TlsReg);
3366 } else if (STI.isTargetDarwin()) {
3367
3368 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3369 unsigned ScratchReg2;
3370 bool SaveScratch2;
3371 if (CompareStackPointer) {
3372 // The primary scratch register is available for holding the TLS offset.
3373 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3374 SaveScratch2 = false;
3375 } else {
3376 // Need to use a second register to hold the TLS offset
3377 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3378
3379 // Unfortunately, with fastcc the second scratch register may hold an
3380 // argument.
3381 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3382 }
3383
3384 // If Scratch2 is live-in then it needs to be saved.
3385 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3386 "Scratch register is live-in and not saved");
3387
3388 if (SaveScratch2)
3389 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3390 .addReg(ScratchReg2, RegState::Kill);
3391
3392 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3393 .addImm(TlsOffset);
3394 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3395 .addReg(ScratchReg)
3396 .addReg(ScratchReg2)
3397 .addImm(1)
3398 .addReg(0)
3399 .addImm(0)
3400 .addReg(TlsReg);
3401
3402 if (SaveScratch2)
3403 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3404 }
3405 }
3406
3407 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3408 // It jumps to normal execution of the function body.
3409 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3410 .addMBB(&PrologueMBB)
3412
3413 // On 32 bit we first push the arguments size and then the frame size. On 64
3414 // bit, we pass the stack frame size in r10 and the argument size in r11.
3415 if (Is64Bit) {
3416 // Functions with nested arguments use R10, so it needs to be saved across
3417 // the call to _morestack
3418
3419 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3420 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3421 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3422 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3423
3424 if (IsNested)
3425 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3426
3427 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3428 .addImm(StackSize);
3429 BuildMI(allocMBB, DL,
3431 Reg11)
3432 .addImm(X86FI->getArgumentStackSize());
3433 } else {
3434 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3435 .addImm(X86FI->getArgumentStackSize());
3436 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3437 }
3438
3439 // __morestack is in libgcc
3441 // Under the large code model, we cannot assume that __morestack lives
3442 // within 2^31 bytes of the call site, so we cannot use pc-relative
3443 // addressing. We cannot perform the call via a temporary register,
3444 // as the rax register may be used to store the static chain, and all
3445 // other suitable registers may be either callee-save or used for
3446 // parameter passing. We cannot use the stack at this point either
3447 // because __morestack manipulates the stack directly.
3448 //
3449 // To avoid these issues, perform an indirect call via a read-only memory
3450 // location containing the address.
3451 //
3452 // This solution is not perfect, as it assumes that the .rodata section
3453 // is laid out within 2^31 bytes of each function body, but this seems
3454 // to be sufficient for JIT.
3455 // FIXME: Add retpoline support and remove the error here..
3457 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3458 "code model and thunks not yet implemented.");
3459 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3460 .addReg(X86::RIP)
3461 .addImm(0)
3462 .addReg(0)
3463 .addExternalSymbol("__morestack_addr")
3464 .addReg(0);
3465 } else {
3466 if (Is64Bit)
3467 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3468 .addExternalSymbol("__morestack");
3469 else
3470 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3471 .addExternalSymbol("__morestack");
3472 }
3473
3474 if (IsNested)
3475 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3476 else
3477 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3478
3479 allocMBB->addSuccessor(&PrologueMBB);
3480
3481 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3482 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3483
3484#ifdef EXPENSIVE_CHECKS
3485 MF.verify();
3486#endif
3487}
3488
3489/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3490/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3491/// to fields it needs, through a named metadata node "hipe.literals" containing
3492/// name-value pairs.
3493static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3494 const StringRef LiteralName) {
3495 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3496 MDNode *Node = HiPELiteralsMD->getOperand(i);
3497 if (Node->getNumOperands() != 2)
3498 continue;
3499 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3500 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3501 if (!NodeName || !NodeVal)
3502 continue;
3503 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3504 if (ValConst && NodeName->getString() == LiteralName) {
3505 return ValConst->getZExtValue();
3506 }
3507 }
3508
3509 report_fatal_error("HiPE literal " + LiteralName +
3510 " required but not provided");
3511}
3512
3513// Return true if there are no non-ehpad successors to MBB and there are no
3514// non-meta instructions between MBBI and MBB.end().
3517 return llvm::all_of(
3518 MBB.successors(),
3519 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3520 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3521 return MI.isMetaInstruction();
3522 });
3523}
3524
3525/// Erlang programs may need a special prologue to handle the stack size they
3526/// might need at runtime. That is because Erlang/OTP does not implement a C
3527/// stack but uses a custom implementation of hybrid stack/heap architecture.
3528/// (for more information see Eric Stenman's Ph.D. thesis:
3529/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3530///
3531/// CheckStack:
3532/// temp0 = sp - MaxStack
3533/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3534/// OldStart:
3535/// ...
3536/// IncStack:
3537/// call inc_stack # doubles the stack space
3538/// temp0 = sp - MaxStack
3539/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3541 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3542 MachineFrameInfo &MFI = MF.getFrameInfo();
3543 DebugLoc DL;
3544
3545 // To support shrink-wrapping we would need to insert the new blocks
3546 // at the right place and update the branches to PrologueMBB.
3547 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3548
3549 // HiPE-specific values
3550 NamedMDNode *HiPELiteralsMD =
3551 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3552 if (!HiPELiteralsMD)
3554 "Can't generate HiPE prologue without runtime parameters");
3555 const unsigned HipeLeafWords = getHiPELiteral(
3556 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3557 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3558 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3559 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3560 ? MF.getFunction().arg_size() - CCRegisteredArgs
3561 : 0;
3562 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3563
3565 "HiPE prologue is only supported on Linux operating systems.");
3566
3567 // Compute the largest caller's frame that is needed to fit the callees'
3568 // frames. This 'MaxStack' is computed from:
3569 //
3570 // a) the fixed frame size, which is the space needed for all spilled temps,
3571 // b) outgoing on-stack parameter areas, and
3572 // c) the minimum stack space this function needs to make available for the
3573 // functions it calls (a tunable ABI property).
3574 if (MFI.hasCalls()) {
3575 unsigned MoreStackForCalls = 0;
3576
3577 for (auto &MBB : MF) {
3578 for (auto &MI : MBB) {
3579 if (!MI.isCall())
3580 continue;
3581
3582 // Get callee operand.
3583 const MachineOperand &MO = MI.getOperand(0);
3584
3585 // Only take account of global function calls (no closures etc.).
3586 if (!MO.isGlobal())
3587 continue;
3588
3589 const Function *F = dyn_cast<Function>(MO.getGlobal());
3590 if (!F)
3591 continue;
3592
3593 // Do not update 'MaxStack' for primitive and built-in functions
3594 // (encoded with names either starting with "erlang."/"bif_" or not
3595 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3596 // "_", such as the BIF "suspend_0") as they are executed on another
3597 // stack.
3598 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3599 F->getName().find_first_of("._") == StringRef::npos)
3600 continue;
3601
3602 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3603 ? F->arg_size() - CCRegisteredArgs
3604 : 0;
3605 if (HipeLeafWords - 1 > CalleeStkArity)
3606 MoreStackForCalls =
3607 std::max(MoreStackForCalls,
3608 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3609 }
3610 }
3611 MaxStack += MoreStackForCalls;
3612 }
3613
3614 // If the stack frame needed is larger than the guaranteed then runtime checks
3615 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3616 if (MaxStack > Guaranteed) {
3617 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3618 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3619
3620 for (const auto &LI : PrologueMBB.liveins()) {
3621 stackCheckMBB->addLiveIn(LI);
3622 incStackMBB->addLiveIn(LI);
3623 }
3624
3625 MF.push_front(incStackMBB);
3626 MF.push_front(stackCheckMBB);
3627
3628 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3629 unsigned LEAop, CMPop, CALLop;
3630 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3631 if (Is64Bit) {
3632 SPReg = X86::RSP;
3633 PReg = X86::RBP;
3634 LEAop = X86::LEA64r;
3635 CMPop = X86::CMP64rm;
3636 CALLop = X86::CALL64pcrel32;
3637 } else {
3638 SPReg = X86::ESP;
3639 PReg = X86::EBP;
3640 LEAop = X86::LEA32r;
3641 CMPop = X86::CMP32rm;
3642 CALLop = X86::CALLpcrel32;
3643 }
3644
3645 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3646 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3647 "HiPE prologue scratch register is live-in");
3648
3649 // Create new MBB for StackCheck:
3650 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3651 false, -MaxStack);
3652 // SPLimitOffset is in a fixed heap location (pointed by BP).
3653 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3654 PReg, false, SPLimitOffset);
3655 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3656 .addMBB(&PrologueMBB)
3658
3659 // Create new MBB for IncStack:
3660 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3661 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3662 false, -MaxStack);
3663 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3664 PReg, false, SPLimitOffset);
3665 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3666 .addMBB(incStackMBB)
3668
3669 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3670 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3671 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3672 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3673 }
3674#ifdef EXPENSIVE_CHECKS
3675 MF.verify();
3676#endif
3677}
3678
3679bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3681 const DebugLoc &DL,
3682 int Offset) const {
3683 if (Offset <= 0)
3684 return false;
3685
3686 if (Offset % SlotSize)
3687 return false;
3688
3689 int NumPops = Offset / SlotSize;
3690 // This is only worth it if we have at most 2 pops.
3691 if (NumPops != 1 && NumPops != 2)
3692 return false;
3693
3694 // Handle only the trivial case where the adjustment directly follows
3695 // a call. This is the most common one, anyway.
3696 if (MBBI == MBB.begin())
3697 return false;
3698 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3699 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3700 return false;
3701
3702 unsigned Regs[2];
3703 unsigned FoundRegs = 0;
3704
3706 const MachineOperand &RegMask = Prev->getOperand(1);
3707
3708 auto &RegClass =
3709 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3710 // Try to find up to NumPops free registers.
3711 for (auto Candidate : RegClass) {
3712 // Poor man's liveness:
3713 // Since we're immediately after a call, any register that is clobbered
3714 // by the call and not defined by it can be considered dead.
3715 if (!RegMask.clobbersPhysReg(Candidate))
3716 continue;
3717
3718 // Don't clobber reserved registers
3719 if (MRI.isReserved(Candidate))
3720 continue;
3721
3722 bool IsDef = false;
3723 for (const MachineOperand &MO : Prev->implicit_operands()) {
3724 if (MO.isReg() && MO.isDef() &&
3725 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3726 IsDef = true;
3727 break;
3728 }
3729 }
3730
3731 if (IsDef)
3732 continue;
3733
3734 Regs[FoundRegs++] = Candidate;
3735 if (FoundRegs == (unsigned)NumPops)
3736 break;
3737 }
3738
3739 if (FoundRegs == 0)
3740 return false;
3741
3742 // If we found only one free register, but need two, reuse the same one twice.
3743 while (FoundRegs < (unsigned)NumPops)
3744 Regs[FoundRegs++] = Regs[0];
3745
3746 for (int i = 0; i < NumPops; ++i)
3747 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3748 Regs[i]);
3749
3750 return true;
3751}
3752
3756 bool reserveCallFrame = hasReservedCallFrame(MF);
3757 unsigned Opcode = I->getOpcode();
3758 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3759 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3760 uint64_t Amount = TII.getFrameSize(*I);
3761 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3762 I = MBB.erase(I);
3763 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3764
3765 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3766 // typically because the function is marked noreturn (abort, throw,
3767 // assert_fail, etc).
3768 if (isDestroy && blockEndIsUnreachable(MBB, I))
3769 return I;
3770
3771 if (!reserveCallFrame) {
3772 // If the stack pointer can be changed after prologue, turn the
3773 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3774 // adjcallstackdown instruction into 'add ESP, <amt>'
3775
3776 // We need to keep the stack aligned properly. To do this, we round the
3777 // amount of space needed for the outgoing arguments up to the next
3778 // alignment boundary.
3779 Amount = alignTo(Amount, getStackAlign());
3780
3781 const Function &F = MF.getFunction();
3782 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3783 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3784
3785 // If we have any exception handlers in this function, and we adjust
3786 // the SP before calls, we may need to indicate this to the unwinder
3787 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3788 // Amount == 0, because the preceding function may have set a non-0
3789 // GNU_ARGS_SIZE.
3790 // TODO: We don't need to reset this between subsequent functions,
3791 // if it didn't change.
3792 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3793
3794 if (HasDwarfEHHandlers && !isDestroy &&
3796 BuildCFI(MBB, InsertPos, DL,
3797 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3798
3799 if (Amount == 0)
3800 return I;
3801
3802 // Factor out the amount that gets handled inside the sequence
3803 // (Pushes of argument for frame setup, callee pops for frame destroy)
3804 Amount -= InternalAmt;
3805
3806 // TODO: This is needed only if we require precise CFA.
3807 // If this is a callee-pop calling convention, emit a CFA adjust for
3808 // the amount the callee popped.
3809 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3810 BuildCFI(MBB, InsertPos, DL,
3811 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3812
3813 // Add Amount to SP to destroy a frame, or subtract to setup.
3814 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3815
3816 if (StackAdjustment) {
3817 // Merge with any previous or following adjustment instruction. Note: the
3818 // instructions merged with here do not have CFI, so their stack
3819 // adjustments do not feed into CfaAdjustment.
3820 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3821 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3822
3823 if (StackAdjustment) {
3824 if (!(F.hasMinSize() &&
3825 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3826 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3827 /*InEpilogue=*/false);
3828 }
3829 }
3830
3831 if (DwarfCFI && !hasFP(MF)) {
3832 // If we don't have FP, but need to generate unwind information,
3833 // we need to set the correct CFA offset after the stack adjustment.
3834 // How much we adjust the CFA offset depends on whether we're emitting
3835 // CFI only for EH purposes or for debugging. EH only requires the CFA
3836 // offset to be correct at each call site, while for debugging we want
3837 // it to be more precise.
3838
3839 int64_t CfaAdjustment = -StackAdjustment;
3840 // TODO: When not using precise CFA, we also need to adjust for the
3841 // InternalAmt here.
3842 if (CfaAdjustment) {
3843 BuildCFI(
3844 MBB, InsertPos, DL,
3845 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3846 }
3847 }
3848
3849 return I;
3850 }
3851
3852 if (InternalAmt) {
3855 while (CI != B && !std::prev(CI)->isCall())
3856 --CI;
3857 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3858 }
3859
3860 return I;
3861}
3862
3864 assert(MBB.getParent() && "Block is not attached to a function!");
3865 const MachineFunction &MF = *MBB.getParent();
3866 if (!MBB.isLiveIn(X86::EFLAGS))
3867 return true;
3868
3869 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3870 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3872 const X86TargetLowering &TLI = *STI.getTargetLowering();
3873 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3874 return false;
3875
3877 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3878}
3879
3881 assert(MBB.getParent() && "Block is not attached to a function!");
3882
3883 // Win64 has strict requirements in terms of epilogue and we are
3884 // not taking a chance at messing with them.
3885 // I.e., unless this block is already an exit block, we can't use
3886 // it as an epilogue.
3887 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3888 return false;
3889
3890 // Swift async context epilogue has a BTR instruction that clobbers parts of
3891 // EFLAGS.
3892 const MachineFunction &MF = *MBB.getParent();
3895
3897 return true;
3898
3899 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3900 // clobbers the EFLAGS. Check that we do not need to preserve it,
3901 // otherwise, conservatively assume this is not
3902 // safe to insert the epilogue here.
3904}
3905
3907 // If we may need to emit frameless compact unwind information, give
3908 // up as this is currently broken: PR25614.
3909 bool CompactUnwind =
3911 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3912 !CompactUnwind) &&
3913 // The lowering of segmented stack and HiPE only support entry
3914 // blocks as prologue blocks: PR26107. This limitation may be
3915 // lifted if we fix:
3916 // - adjustForSegmentedStacks
3917 // - adjustForHiPEPrologue
3919 !MF.shouldSplitStack();
3920}
3921
3924 const DebugLoc &DL, bool RestoreSP) const {
3925 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3926 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3927 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3928 "restoring EBP/ESI on non-32-bit target");
3929
3930 MachineFunction &MF = *MBB.getParent();
3932 Register BasePtr = TRI->getBaseRegister();
3933 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3935 MachineFrameInfo &MFI = MF.getFrameInfo();
3936
3937 // FIXME: Don't set FrameSetup flag in catchret case.
3938
3939 int FI = FuncInfo.EHRegNodeFrameIndex;
3940 int EHRegSize = MFI.getObjectSize(FI);
3941
3942 if (RestoreSP) {
3943 // MOV32rm -EHRegSize(%ebp), %esp
3944 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3945 X86::EBP, true, -EHRegSize)
3947 }
3948
3949 Register UsedReg;
3950 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3951 int EndOffset = -EHRegOffset - EHRegSize;
3952 FuncInfo.EHRegNodeEndOffset = EndOffset;
3953
3954 if (UsedReg == FramePtr) {
3955 // ADD $offset, %ebp
3956 unsigned ADDri = getADDriOpcode(false);
3957 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3959 .addImm(EndOffset)
3961 ->getOperand(3)
3962 .setIsDead();
3963 assert(EndOffset >= 0 &&
3964 "end of registration object above normal EBP position!");
3965 } else if (UsedReg == BasePtr) {
3966 // LEA offset(%ebp), %esi
3967 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3968 FramePtr, false, EndOffset)
3970 // MOV32rm SavedEBPOffset(%esi), %ebp
3971 assert(X86FI->getHasSEHFramePtrSave());
3972 int Offset =
3973 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3974 .getFixed();
3975 assert(UsedReg == BasePtr);
3976 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3977 UsedReg, true, Offset)
3979 } else {
3980 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3981 }
3982 return MBBI;
3983}
3984
3986 return TRI->getSlotSize();
3987}
3988
3991 return StackPtr;
3992}
3993
3997 Register FrameRegister = RI->getFrameRegister(MF);
3998 if (getInitialCFARegister(MF) == FrameRegister &&
4000 DwarfFrameBase FrameBase;
4001 FrameBase.Kind = DwarfFrameBase::CFA;
4002 FrameBase.Location.Offset =
4004 return FrameBase;
4005 }
4006
4007 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
4008}
4009
4010namespace {
4011// Struct used by orderFrameObjects to help sort the stack objects.
4012struct X86FrameSortingObject {
4013 bool IsValid = false; // true if we care about this Object.
4014 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4015 unsigned ObjectSize = 0; // Size of Object in bytes.
4016 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4017 unsigned ObjectNumUses = 0; // Object static number of uses.
4018};
4019
4020// The comparison function we use for std::sort to order our local
4021// stack symbols. The current algorithm is to use an estimated
4022// "density". This takes into consideration the size and number of
4023// uses each object has in order to roughly minimize code size.
4024// So, for example, an object of size 16B that is referenced 5 times
4025// will get higher priority than 4 4B objects referenced 1 time each.
4026// It's not perfect and we may be able to squeeze a few more bytes out of
4027// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4028// fringe end can have special consideration, given their size is less
4029// important, etc.), but the algorithmic complexity grows too much to be
4030// worth the extra gains we get. This gets us pretty close.
4031// The final order leaves us with objects with highest priority going
4032// at the end of our list.
4033struct X86FrameSortingComparator {
4034 inline bool operator()(const X86FrameSortingObject &A,
4035 const X86FrameSortingObject &B) const {
4036 uint64_t DensityAScaled, DensityBScaled;
4037
4038 // For consistency in our comparison, all invalid objects are placed
4039 // at the end. This also allows us to stop walking when we hit the
4040 // first invalid item after it's all sorted.
4041 if (!A.IsValid)
4042 return false;
4043 if (!B.IsValid)
4044 return true;
4045
4046 // The density is calculated by doing :
4047 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4048 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4049 // Since this approach may cause inconsistencies in
4050 // the floating point <, >, == comparisons, depending on the floating
4051 // point model with which the compiler was built, we're going
4052 // to scale both sides by multiplying with
4053 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4054 // the division and, with it, the need for any floating point
4055 // arithmetic.
4056 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4057 static_cast<uint64_t>(B.ObjectSize);
4058 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4059 static_cast<uint64_t>(A.ObjectSize);
4060
4061 // If the two densities are equal, prioritize highest alignment
4062 // objects. This allows for similar alignment objects
4063 // to be packed together (given the same density).
4064 // There's room for improvement here, also, since we can pack
4065 // similar alignment (different density) objects next to each
4066 // other to save padding. This will also require further
4067 // complexity/iterations, and the overall gain isn't worth it,
4068 // in general. Something to keep in mind, though.
4069 if (DensityAScaled == DensityBScaled)
4070 return A.ObjectAlignment < B.ObjectAlignment;
4071
4072 return DensityAScaled < DensityBScaled;
4073 }
4074};
4075} // namespace
4076
4077// Order the symbols in the local stack.
4078// We want to place the local stack objects in some sort of sensible order.
4079// The heuristic we use is to try and pack them according to static number
4080// of uses and size of object in order to minimize code size.
4082 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4083 const MachineFrameInfo &MFI = MF.getFrameInfo();
4084
4085 // Don't waste time if there's nothing to do.
4086 if (ObjectsToAllocate.empty())
4087 return;
4088
4089 // Create an array of all MFI objects. We won't need all of these
4090 // objects, but we're going to create a full array of them to make
4091 // it easier to index into when we're counting "uses" down below.
4092 // We want to be able to easily/cheaply access an object by simply
4093 // indexing into it, instead of having to search for it every time.
4094 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4095
4096 // Walk the objects we care about and mark them as such in our working
4097 // struct.
4098 for (auto &Obj : ObjectsToAllocate) {
4099 SortingObjects[Obj].IsValid = true;
4100 SortingObjects[Obj].ObjectIndex = Obj;
4101 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4102 // Set the size.
4103 int ObjectSize = MFI.getObjectSize(Obj);
4104 if (ObjectSize == 0)
4105 // Variable size. Just use 4.
4106 SortingObjects[Obj].ObjectSize = 4;
4107 else
4108 SortingObjects[Obj].ObjectSize = ObjectSize;
4109 }
4110
4111 // Count the number of uses for each object.
4112 for (auto &MBB : MF) {
4113 for (auto &MI : MBB) {
4114 if (MI.isDebugInstr())
4115 continue;
4116 for (const MachineOperand &MO : MI.operands()) {
4117 // Check to see if it's a local stack symbol.
4118 if (!MO.isFI())
4119 continue;
4120 int Index = MO.getIndex();
4121 // Check to see if it falls within our range, and is tagged
4122 // to require ordering.
4123 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4124 SortingObjects[Index].IsValid)
4125 SortingObjects[Index].ObjectNumUses++;
4126 }
4127 }
4128 }
4129
4130 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4131 // info).
4132 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4133
4134 // Now modify the original list to represent the final order that
4135 // we want. The order will depend on whether we're going to access them
4136 // from the stack pointer or the frame pointer. For SP, the list should
4137 // end up with the END containing objects that we want with smaller offsets.
4138 // For FP, it should be flipped.
4139 int i = 0;
4140 for (auto &Obj : SortingObjects) {
4141 // All invalid items are sorted at the end, so it's safe to stop.
4142 if (!Obj.IsValid)
4143 break;
4144 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4145 }
4146
4147 // Flip it if we're accessing off of the FP.
4148 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4149 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4150}
4151
4152unsigned
4154 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4155 unsigned Offset = 16;
4156 // RBP is immediately pushed.
4157 Offset += SlotSize;
4158 // All callee-saved registers are then pushed.
4159 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4160 // Every funclet allocates enough stack space for the largest outgoing call.
4161 Offset += getWinEHFuncletFrameSize(MF);
4162 return Offset;
4163}
4164
4166 MachineFunction &MF, RegScavenger *RS) const {
4167 // Mark the function as not having WinCFI. We will set it back to true in
4168 // emitPrologue if it gets called and emits CFI.
4169 MF.setHasWinCFI(false);
4170
4171 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4172 // aligned. The format doesn't support misaligned stack adjustments.
4175
4176 // If this function isn't doing Win64-style C++ EH, we don't need to do
4177 // anything.
4178 if (STI.is64Bit() && MF.hasEHFunclets() &&
4181 adjustFrameForMsvcCxxEh(MF);
4182 }
4183}
4184
4185void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4186 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4187 // relative to RSP after the prologue. Find the offset of the last fixed
4188 // object, so that we can allocate a slot immediately following it. If there
4189 // were no fixed objects, use offset -SlotSize, which is immediately after the
4190 // return address. Fixed objects have negative frame indices.
4191 MachineFrameInfo &MFI = MF.getFrameInfo();
4192 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4193 int64_t MinFixedObjOffset = -SlotSize;
4194 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4195 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4196
4197 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4198 for (WinEHHandlerType &H : TBME.HandlerArray) {
4199 int FrameIndex = H.CatchObj.FrameIndex;
4200 if (FrameIndex != INT_MAX) {
4201 // Ensure alignment.
4202 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4203 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4204 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4205 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4206 }
4207 }
4208 }
4209
4210 // Ensure alignment.
4211 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4212 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4213 int UnwindHelpFI =
4214 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4215 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4216
4217 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4218 // other frame setup instructions.
4219 MachineBasicBlock &MBB = MF.front();
4220 auto MBBI = MBB.begin();
4221 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4222 ++MBBI;
4223
4225 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4226 UnwindHelpFI)
4227 .addImm(-2);
4228}
4229
4231 MachineFunction &MF, RegScavenger *RS) const {
4232 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4233
4234 if (STI.is32Bit() && MF.hasEHFunclets())
4236 // We have emitted prolog and epilog. Don't need stack pointer saving
4237 // instruction any more.
4238 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4239 MI->eraseFromParent();
4240 X86FI->setStackPtrSaveMI(nullptr);
4241 }
4242}
4243
4245 MachineFunction &MF) const {
4246 // 32-bit functions have to restore stack pointers when control is transferred
4247 // back to the parent function. These blocks are identified as eh pads that
4248 // are not funclet entries.
4249 bool IsSEH = isAsynchronousEHPersonality(
4251 for (MachineBasicBlock &MBB : MF) {
4252 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4253 if (NeedsRestore)
4255 /*RestoreSP=*/IsSEH);
4256 }
4257}
4258
4259// Compute the alignment gap between current SP after spilling FP/BP and the
4260// next properly aligned stack offset.
4262 const TargetRegisterClass *RC,
4263 unsigned NumSpilledRegs) {
4265 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4266 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4267 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4268 return AlignedSize - AllocSize;
4269}
4270
4271void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4273 Register FP, Register BP,
4274 int SPAdjust) const {
4275 assert(FP.isValid() || BP.isValid());
4276
4277 MachineBasicBlock *MBB = BeforeMI->getParent();
4278 DebugLoc DL = BeforeMI->getDebugLoc();
4279
4280 // Spill FP.
4281 if (FP.isValid()) {
4282 BuildMI(*MBB, BeforeMI, DL,
4284 .addReg(FP);
4285 }
4286
4287 // Spill BP.
4288 if (BP.isValid()) {
4289 BuildMI(*MBB, BeforeMI, DL,
4291 .addReg(BP);
4292 }
4293
4294 // Make sure SP is aligned.
4295 if (SPAdjust)
4296 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4297
4298 // Emit unwinding information.
4299 if (FP.isValid() && needsDwarfCFI(MF)) {
4300 // Emit .cfi_remember_state to remember old frame.
4301 unsigned CFIIndex =
4303 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4304 .addCFIIndex(CFIIndex);
4305
4306 // Setup new CFA value with DW_CFA_def_cfa_expression:
4307 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4308 SmallString<64> CfaExpr;
4309 uint8_t buffer[16];
4310 int Offset = SPAdjust;
4311 if (BP.isValid())
4312 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4313 // If BeforeMI is a frame setup instruction, we need to adjust the position
4314 // and offset of the new cfi instruction.
4315 if (TII.isFrameSetup(*BeforeMI)) {
4316 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4317 BeforeMI = std::next(BeforeMI);
4318 }
4320 if (STI.isTarget64BitILP32())
4322 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4323 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4324 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4325 CfaExpr.push_back(dwarf::DW_OP_deref);
4326 CfaExpr.push_back(dwarf::DW_OP_consts);
4327 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4328 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4329
4330 SmallString<64> DefCfaExpr;
4331 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4332 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4333 DefCfaExpr.append(CfaExpr.str());
4334 BuildCFI(*MBB, BeforeMI, DL,
4335 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4337 }
4338}
4339
4340void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4342 Register FP, Register BP,
4343 int SPAdjust) const {
4344 assert(FP.isValid() || BP.isValid());
4345
4346 // Adjust SP so it points to spilled FP or BP.
4347 MachineBasicBlock *MBB = AfterMI->getParent();
4348 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4349 DebugLoc DL = AfterMI->getDebugLoc();
4350 if (SPAdjust)
4351 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4352
4353 // Restore BP.
4354 if (BP.isValid()) {
4355 BuildMI(*MBB, Pos, DL,
4356 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4357 }
4358
4359 // Restore FP.
4360 if (FP.isValid()) {
4361 BuildMI(*MBB, Pos, DL,
4363
4364 // Emit unwinding information.
4365 if (needsDwarfCFI(MF)) {
4366 // Restore original frame with .cfi_restore_state.
4367 unsigned CFIIndex =
4369 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4370 .addCFIIndex(CFIIndex);
4371 }
4372 }
4373}
4374
4375void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4377 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4378 assert(SpillFP || SpillBP);
4379
4380 Register FP, BP;
4381 const TargetRegisterClass *RC;
4382 unsigned NumRegs = 0;
4383
4384 if (SpillFP) {
4385 FP = TRI->getFrameRegister(MF);
4386 if (STI.isTarget64BitILP32())
4388 RC = TRI->getMinimalPhysRegClass(FP);
4389 ++NumRegs;
4390 }
4391 if (SpillBP) {
4392 BP = TRI->getBaseRegister();
4393 if (STI.isTarget64BitILP32())
4394 BP = Register(getX86SubSuperRegister(BP, 64));
4395 RC = TRI->getMinimalPhysRegClass(BP);
4396 ++NumRegs;
4397 }
4398 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4399
4400 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4401 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4402}
4403
4404bool X86FrameLowering::skipSpillFPBP(
4406 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4407 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4408 // SaveRbx = COPY RBX
4409 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4410 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4411 // We should skip this instruction sequence.
4412 int FI;
4413 unsigned Reg;
4414 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4415 MI->getOperand(1).getReg() == X86::RBX) &&
4416 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4417 ++MI;
4418 return true;
4419 }
4420 return false;
4421}
4422
4424 const TargetRegisterInfo *TRI, bool &AccessFP,
4425 bool &AccessBP) {
4426 AccessFP = AccessBP = false;
4427 if (FP) {
4428 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4429 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4430 AccessFP = true;
4431 }
4432 if (BP) {
4433 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4434 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4435 AccessBP = true;
4436 }
4437 return AccessFP || AccessBP;
4438}
4439
4440// Invoke instruction has been lowered to normal function call. We try to figure
4441// out if MI comes from Invoke.
4442// Do we have any better method?
4443static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4444 if (!MI.isCall())
4445 return false;
4446 if (InsideEHLabels)
4447 return true;
4448
4449 const MachineBasicBlock *MBB = MI.getParent();
4450 if (!MBB->hasEHPadSuccessor())
4451 return false;
4452
4453 // Check if there is another call instruction from MI to the end of MBB.
4455 for (++MBBI; MBBI != ME; ++MBBI)
4456 if (MBBI->isCall())
4457 return false;
4458 return true;
4459}
4460
4461/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4462/// interfered stack access in the range, usually generated by register spill.
4463void X86FrameLowering::checkInterferedAccess(
4465 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4466 bool SpillBP) const {
4467 if (DefMI == KillMI)
4468 return;
4469 if (TRI->hasBasePointer(MF)) {
4470 if (!SpillBP)
4471 return;
4472 } else {
4473 if (!SpillFP)
4474 return;
4475 }
4476
4477 auto MI = KillMI;
4478 while (MI != DefMI) {
4479 if (any_of(MI->operands(),
4480 [](const MachineOperand &MO) { return MO.isFI(); }))
4482 "Interference usage of base pointer/frame "
4483 "pointer.");
4484 MI++;
4485 }
4486}
4487
4488/// If a function uses base pointer and the base pointer is clobbered by inline
4489/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4490/// contains garbage value.
4491/// For example if a 32b x86 function uses base pointer esi, and esi is
4492/// clobbered by following inline asm
4493/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4494/// We need to save esi before the asm and restore it after the asm.
4495///
4496/// The problem can also occur to frame pointer if there is a function call, and
4497/// the callee uses a different calling convention and clobbers the fp.
4498///
4499/// Because normal frame objects (spill slots) are accessed through fp/bp
4500/// register, so we can't spill fp/bp to normal spill slots.
4501///
4502/// FIXME: There are 2 possible enhancements:
4503/// 1. In many cases there are different physical registers not clobbered by
4504/// inline asm, we can use one of them as base pointer. Or use a virtual
4505/// register as base pointer and let RA allocate a physical register to it.
4506/// 2. If there is no other instructions access stack with fp/bp from the
4507/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4508/// skip the save and restore operations.
4510 Register FP, BP;
4512 if (TFI.hasFP(MF))
4513 FP = TRI->getFrameRegister(MF);
4514 if (TRI->hasBasePointer(MF))
4515 BP = TRI->getBaseRegister();
4516
4517 // Currently only inline asm and function call can clobbers fp/bp. So we can
4518 // do some quick test and return early.
4519 if (!MF.hasInlineAsm()) {
4521 if (!X86FI->getFPClobberedByCall())
4522 FP = 0;
4523 if (!X86FI->getBPClobberedByCall())
4524 BP = 0;
4525 }
4526 if (!FP && !BP)
4527 return;
4528
4529 for (MachineBasicBlock &MBB : MF) {
4530 bool InsideEHLabels = false;
4531 auto MI = MBB.rbegin(), ME = MBB.rend();
4532 auto TermMI = MBB.getFirstTerminator();
4533 if (TermMI == MBB.begin())
4534 continue;
4535 MI = *(std::prev(TermMI));
4536
4537 while (MI != ME) {
4538 // Skip frame setup/destroy instructions.
4539 // Skip Invoke (call inside try block) instructions.
4540 // Skip instructions handled by target.
4541 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4543 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4544 ++MI;
4545 continue;
4546 }
4547
4548 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4549 InsideEHLabels = !InsideEHLabels;
4550 ++MI;
4551 continue;
4552 }
4553
4554 bool AccessFP, AccessBP;
4555 // Check if fp or bp is used in MI.
4556 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4557 ++MI;
4558 continue;
4559 }
4560
4561 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4562 // used.
4563 bool FPLive = false, BPLive = false;
4564 bool SpillFP = false, SpillBP = false;
4565 auto DefMI = MI, KillMI = MI;
4566 do {
4567 SpillFP |= AccessFP;
4568 SpillBP |= AccessBP;
4569
4570 // Maintain FPLive and BPLive.
4571 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4572 FPLive = false;
4573 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4574 FPLive = true;
4575 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4576 BPLive = false;
4577 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4578 BPLive = true;
4579
4580 DefMI = MI++;
4581 } while ((MI != ME) &&
4582 (FPLive || BPLive ||
4583 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4584
4585 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4586 if (FPLive && !SpillBP)
4587 continue;
4588
4589 // If the bp is clobbered by a call, we should save and restore outside of
4590 // the frame setup instructions.
4591 if (KillMI->isCall() && DefMI != ME) {
4592 auto FrameSetup = std::next(DefMI);
4593 // Look for frame setup instruction toward the start of the BB.
4594 // If we reach another call instruction, it means no frame setup
4595 // instruction for the current call instruction.
4596 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4597 !FrameSetup->isCall())
4598 ++FrameSetup;
4599 // If a frame setup instruction is found, we need to find out the
4600 // corresponding frame destroy instruction.
4601 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4602 (TII.getFrameSize(*FrameSetup) ||
4603 TII.getFrameAdjustment(*FrameSetup))) {
4604 while (!TII.isFrameInstr(*KillMI))
4605 --KillMI;
4606 DefMI = FrameSetup;
4607 MI = DefMI;
4608 ++MI;
4609 }
4610 }
4611
4612 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4613
4614 // Call target function to spill and restore FP and BP registers.
4615 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4616 }
4617 }
4618}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isFuncletReturnInstr(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr Register SPReg
static constexpr Register FPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:160
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
reverse_iterator rbegin() const
Definition: ArrayRef.h:159
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:905
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1048
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
size_t arg_size() const
Definition: Function.h:901
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:682
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:661
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:582
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:693
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:656
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:575
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition: MCDwarf.h:676
OpType getOperation() const
Definition: MCDwarf.h:710
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:687
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:598
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition: MCDwarf.h:681
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1072
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1073
A single uniqued string.
Definition: Metadata.h:724
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:297
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:597
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
A tuple of MDNodes.
Definition: Metadata.h:1737
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1425
unsigned getNumOperands() const
Definition: Metadata.cpp:1421
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:115
Represents a location in source code.
Definition: SMLoc.h:23
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
static constexpr size_t npos
Definition: StringRef.h:53
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:652
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:585
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:454
Value * getValue() const
Definition: Metadata.h:494
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:322
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:118
bool isTargetDragonFly() const
Definition: X86Subtarget.h:282
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:300
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:280
bool isTargetWin64() const
Definition: X86Subtarget.h:324
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:386
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:304
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:337
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:281
bool isTargetNaCl64() const
Definition: X86Subtarget.h:296
bool isTargetWin32() const
Definition: X86Subtarget.h:326
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:218
bool isTargetLinux() const
Definition: X86Subtarget.h:290
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:557
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:195
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
union llvm::TargetFrameLowering::DwarfFrameBase::@248 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76