LLVM 20.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/LEB128.h"
37#include <cstdlib>
38
39#define DEBUG_TYPE "x86-fl"
40
41STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
42STATISTIC(NumFrameExtraProbe,
43 "Number of extra stack probes generated in prologue");
44STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
45
46using namespace llvm;
47
49 MaybeAlign StackAlignOverride)
50 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
51 STI.is64Bit() ? -8 : -4),
52 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
55 Is64Bit = STI.is64Bit();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
60}
61
63 return !MF.getFrameInfo().hasVarSizedObjects() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
66}
67
68/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69/// call frame pseudos can be simplified. Having a FP, as in the default
70/// implementation, is not sufficient here since we can't always use it.
71/// Use a more nuanced condition.
73 const MachineFunction &MF) const {
74 return hasReservedCallFrame(MF) ||
75 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
76 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78}
79
80// needsFrameIndexResolution - Do we need to perform FI resolution for
81// this function. Normally, this is required only when the function
82// has any stack objects. However, FI resolution actually has another job,
83// not apparent from the title - it resolves callframesetup/destroy
84// that were not simplified earlier.
85// So, this is required for x86 functions that have push sequences even
86// when there are no stack objects.
88 const MachineFunction &MF) const {
89 return MF.getFrameInfo().hasStackObjects() ||
90 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
91}
92
93/// hasFPImpl - Return true if the specified function should have a dedicated
94/// frame pointer register. This is true if the function has variable sized
95/// allocas or if frame pointer elimination is disabled.
97 const MachineFrameInfo &MFI = MF.getFrameInfo();
98 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
99 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
103 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
104 MFI.hasStackMap() || MFI.hasPatchPoint() ||
105 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
106}
107
108static unsigned getSUBriOpcode(bool IsLP64) {
109 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
110}
111
112static unsigned getADDriOpcode(bool IsLP64) {
113 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
114}
115
116static unsigned getSUBrrOpcode(bool IsLP64) {
117 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
118}
119
120static unsigned getADDrrOpcode(bool IsLP64) {
121 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
122}
123
124static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
125 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
126}
127
128static unsigned getLEArOpcode(bool IsLP64) {
129 return IsLP64 ? X86::LEA64r : X86::LEA32r;
130}
131
132static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
133 if (Use64BitReg) {
134 if (isUInt<32>(Imm))
135 return X86::MOV32ri64;
136 if (isInt<32>(Imm))
137 return X86::MOV64ri32;
138 return X86::MOV64ri;
139 }
140 return X86::MOV32ri;
141}
142
143// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
144// value written by the PUSH from the stack. The processor tracks these marked
145// instructions internally and fast-forwards register data between matching PUSH
146// and POP instructions, without going through memory or through the training
147// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
148// memory-renaming optimization can be used.
149//
150// The PPX hint is purely a performance hint. Instructions with this hint have
151// the same functional semantics as those without. PPX hints set by the
152// compiler that violate the balancing rule may turn off the PPX optimization,
153// but they will not affect program semantics.
154//
155// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
156// are not considered).
157//
158// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
159// GPRs at a time to/from the stack.
160static unsigned getPUSHOpcode(const X86Subtarget &ST) {
161 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
162 : X86::PUSH32r;
163}
164static unsigned getPOPOpcode(const X86Subtarget &ST) {
165 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
166 : X86::POP32r;
167}
168static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
169 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
170}
171static unsigned getPOP2Opcode(const X86Subtarget &ST) {
172 return ST.hasPPX() ? X86::POP2P : X86::POP2;
173}
174
177 unsigned Reg = RegMask.PhysReg;
178
179 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
180 Reg == X86::AH || Reg == X86::AL)
181 return true;
182 }
183
184 return false;
185}
186
187/// Check if the flags need to be preserved before the terminators.
188/// This would be the case, if the eflags is live-in of the region
189/// composed by the terminators or live-out of that region, without
190/// being defined by a terminator.
191static bool
193 for (const MachineInstr &MI : MBB.terminators()) {
194 bool BreakNext = false;
195 for (const MachineOperand &MO : MI.operands()) {
196 if (!MO.isReg())
197 continue;
198 Register Reg = MO.getReg();
199 if (Reg != X86::EFLAGS)
200 continue;
201
202 // This terminator needs an eflags that is not defined
203 // by a previous another terminator:
204 // EFLAGS is live-in of the region composed by the terminators.
205 if (!MO.isDef())
206 return true;
207 // This terminator defines the eflags, i.e., we don't need to preserve it.
208 // However, we still need to check this specific terminator does not
209 // read a live-in value.
210 BreakNext = true;
211 }
212 // We found a definition of the eflags, no need to preserve them.
213 if (BreakNext)
214 return false;
215 }
216
217 // None of the terminators use or define the eflags.
218 // Check if they are live-out, that would imply we need to preserve them.
219 for (const MachineBasicBlock *Succ : MBB.successors())
220 if (Succ->isLiveIn(X86::EFLAGS))
221 return true;
222
223 return false;
224}
225
226/// emitSPUpdate - Emit a series of instructions to increment / decrement the
227/// stack pointer by a constant value.
230 const DebugLoc &DL, int64_t NumBytes,
231 bool InEpilogue) const {
232 bool isSub = NumBytes < 0;
233 uint64_t Offset = isSub ? -NumBytes : NumBytes;
236
237 uint64_t Chunk = (1LL << 31) - 1;
238
242 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
243
244 // It's ok to not take into account large chunks when probing, as the
245 // allocation is split in smaller chunks anyway.
246 if (EmitInlineStackProbe && !InEpilogue) {
247
248 // This pseudo-instruction is going to be expanded, potentially using a
249 // loop, by inlineStackProbe().
250 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
251 return;
252 } else if (Offset > Chunk) {
253 // Rather than emit a long series of instructions for large offsets,
254 // load the offset into a register and do one sub/add
255 unsigned Reg = 0;
256 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
257
258 if (isSub && !isEAXLiveIn(MBB))
259 Reg = Rax;
260 else
262
263 unsigned AddSubRROpc =
265 if (Reg) {
267 .addImm(Offset)
268 .setMIFlag(Flag);
269 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
271 .addReg(Reg);
272 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
273 return;
274 } else if (Offset > 8 * Chunk) {
275 // If we would need more than 8 add or sub instructions (a >16GB stack
276 // frame), it's worth spilling RAX to materialize this immediate.
277 // pushq %rax
278 // movabsq +-$Offset+-SlotSize, %rax
279 // addq %rsp, %rax
280 // xchg %rax, (%rsp)
281 // movq (%rsp), %rsp
282 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
283 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
285 .setMIFlag(Flag);
286 // Subtract is not commutative, so negate the offset and always use add.
287 // Subtract 8 less and add 8 more to account for the PUSH we just did.
288 if (isSub)
289 Offset = -(Offset - SlotSize);
290 else
293 .addImm(Offset)
294 .setMIFlag(Flag);
295 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
296 .addReg(Rax)
298 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
299 // Exchange the new SP in RAX with the top of the stack.
301 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
302 StackPtr, false, 0);
303 // Load new SP from the top of the stack into RSP.
304 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
305 StackPtr, false, 0);
306 return;
307 }
308 }
309
310 while (Offset) {
311 uint64_t ThisVal = std::min(Offset, Chunk);
312 if (ThisVal == SlotSize) {
313 // Use push / pop for slot sized adjustments as a size optimization. We
314 // need to find a dead register when using pop.
315 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
317 if (Reg) {
318 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
319 : (Is64Bit ? X86::POP64r : X86::POP32r);
320 BuildMI(MBB, MBBI, DL, TII.get(Opc))
321 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
322 .setMIFlag(Flag);
323 Offset -= ThisVal;
324 continue;
325 }
326 }
327
328 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
329 .setMIFlag(Flag);
330
331 Offset -= ThisVal;
332 }
333}
334
335MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
337 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
338 assert(Offset != 0 && "zero offset stack adjustment requested");
339
340 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
341 // is tricky.
342 bool UseLEA;
343 if (!InEpilogue) {
344 // Check if inserting the prologue at the beginning
345 // of MBB would require to use LEA operations.
346 // We need to use LEA operations if EFLAGS is live in, because
347 // it means an instruction will read it before it gets defined.
348 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
349 } else {
350 // If we can use LEA for SP but we shouldn't, check that none
351 // of the terminators uses the eflags. Otherwise we will insert
352 // a ADD that will redefine the eflags and break the condition.
353 // Alternatively, we could move the ADD, but this may not be possible
354 // and is an optimization anyway.
356 if (UseLEA && !STI.useLeaForSP())
358 // If that assert breaks, that means we do not do the right thing
359 // in canUseAsEpilogue.
361 "We shouldn't have allowed this insertion point");
362 }
363
365 if (UseLEA) {
368 StackPtr),
369 StackPtr, false, Offset);
370 } else {
371 bool IsSub = Offset < 0;
372 uint64_t AbsOffset = IsSub ? -Offset : Offset;
373 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
375 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
377 .addImm(AbsOffset);
378 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
379 }
380 return MI;
381}
382
385 bool doMergeWithPrevious) const {
386 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
387 (!doMergeWithPrevious && MBBI == MBB.end()))
388 return 0;
389
390 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
391
393 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
394 // instruction, and that there are no DBG_VALUE or other instructions between
395 // ADD/SUB/LEA and its corresponding CFI instruction.
396 /* TODO: Add support for the case where there are multiple CFI instructions
397 below the ADD/SUB/LEA, e.g.:
398 ...
399 add
400 cfi_def_cfa_offset
401 cfi_offset
402 ...
403 */
404 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
405 PI = std::prev(PI);
406
407 unsigned Opc = PI->getOpcode();
408 int Offset = 0;
409
410 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
411 PI->getOperand(0).getReg() == StackPtr) {
412 assert(PI->getOperand(1).getReg() == StackPtr);
413 Offset = PI->getOperand(2).getImm();
414 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
415 PI->getOperand(0).getReg() == StackPtr &&
416 PI->getOperand(1).getReg() == StackPtr &&
417 PI->getOperand(2).getImm() == 1 &&
418 PI->getOperand(3).getReg() == X86::NoRegister &&
419 PI->getOperand(5).getReg() == X86::NoRegister) {
420 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
421 Offset = PI->getOperand(4).getImm();
422 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
423 PI->getOperand(0).getReg() == StackPtr) {
424 assert(PI->getOperand(1).getReg() == StackPtr);
425 Offset = -PI->getOperand(2).getImm();
426 } else
427 return 0;
428
429 PI = MBB.erase(PI);
430 if (PI != MBB.end() && PI->isCFIInstruction()) {
431 auto CIs = MBB.getParent()->getFrameInstructions();
432 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
435 PI = MBB.erase(PI);
436 }
437 if (!doMergeWithPrevious)
439
440 return Offset;
441}
442
445 const DebugLoc &DL,
446 const MCCFIInstruction &CFIInst,
447 MachineInstr::MIFlag Flag) const {
449 unsigned CFIIndex = MF.addFrameInst(CFIInst);
450
452 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
453
454 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
455 .addCFIIndex(CFIIndex)
456 .setMIFlag(Flag);
457}
458
459/// Emits Dwarf Info specifying offsets of callee saved registers and
460/// frame pointer. This is called only when basic block sections are enabled.
464 if (!hasFP(MF)) {
466 return;
467 }
470 const Register MachineFramePtr =
472 : FramePtr;
473 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
474 // Offset = space for return address + size of the frame pointer itself.
475 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
477 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
479}
480
483 const DebugLoc &DL, bool IsPrologue) const {
485 MachineFrameInfo &MFI = MF.getFrameInfo();
488
489 // Add callee saved registers to move list.
490 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
491
492 // Calculate offsets.
493 for (const CalleeSavedInfo &I : CSI) {
494 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
495 Register Reg = I.getReg();
496 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
497
498 if (IsPrologue) {
499 if (X86FI->getStackPtrSaveMI()) {
500 // +2*SlotSize because there is return address and ebp at the bottom
501 // of the stack.
502 // | retaddr |
503 // | ebp |
504 // | |<--ebp
505 Offset += 2 * SlotSize;
506 SmallString<64> CfaExpr;
507 CfaExpr.push_back(dwarf::DW_CFA_expression);
508 uint8_t buffer[16];
509 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
510 CfaExpr.push_back(2);
512 const Register MachineFramePtr =
515 : FramePtr;
516 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
517 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
518 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
520 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
522 } else {
524 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
525 }
526 } else {
528 MCCFIInstruction::createRestore(nullptr, DwarfReg));
529 }
530 }
531 if (auto *MI = X86FI->getStackPtrSaveMI()) {
532 int FI = MI->getOperand(1).getIndex();
533 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
534 SmallString<64> CfaExpr;
536 const Register MachineFramePtr =
539 : FramePtr;
540 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
541 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
542 uint8_t buffer[16];
543 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
544 CfaExpr.push_back(dwarf::DW_OP_deref);
545
546 SmallString<64> DefCfaExpr;
547 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
548 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
549 DefCfaExpr.append(CfaExpr.str());
550 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
552 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
554 }
555}
556
557void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
558 MachineBasicBlock &MBB) const {
559 const MachineFunction &MF = *MBB.getParent();
560
561 // Insertion point.
563
564 // Fake a debug loc.
565 DebugLoc DL;
566 if (MBBI != MBB.end())
567 DL = MBBI->getDebugLoc();
568
569 // Zero out FP stack if referenced. Do this outside of the loop below so that
570 // it's done only once.
571 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
572 for (MCRegister Reg : RegsToZero.set_bits()) {
573 if (!X86::RFP80RegClass.contains(Reg))
574 continue;
575
576 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
577 for (unsigned i = 0; i != NumFPRegs; ++i)
578 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
579
580 for (unsigned i = 0; i != NumFPRegs; ++i)
581 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
582 break;
583 }
584
585 // For GPRs, we only care to clear out the 32-bit register.
586 BitVector GPRsToZero(TRI->getNumRegs());
587 for (MCRegister Reg : RegsToZero.set_bits())
588 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
589 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
590 RegsToZero.reset(Reg);
591 }
592
593 // Zero out the GPRs first.
594 for (MCRegister Reg : GPRsToZero.set_bits())
596
597 // Zero out the remaining registers.
598 for (MCRegister Reg : RegsToZero.set_bits())
600}
601
604 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
605 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
608 if (InProlog) {
609 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
610 .addImm(0 /* no explicit stack size */);
611 } else {
612 emitStackProbeInline(MF, MBB, MBBI, DL, false);
613 }
614 } else {
615 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
616 }
617}
618
620 return STI.isOSWindows() && !STI.isTargetWin64();
621}
622
624 MachineBasicBlock &PrologMBB) const {
625 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
626 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
627 });
628 if (Where != PrologMBB.end()) {
629 DebugLoc DL = PrologMBB.findDebugLoc(Where);
630 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
631 Where->eraseFromParent();
632 }
633}
634
635void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
638 const DebugLoc &DL,
639 bool InProlog) const {
641 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
642 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
643 else
644 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
645}
646
647void X86FrameLowering::emitStackProbeInlineGeneric(
649 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
650 MachineInstr &AllocWithProbe = *MBBI;
651 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
652
655 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
656 "different expansion expected for CoreCLR 64 bit");
657
658 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
659 uint64_t ProbeChunk = StackProbeSize * 8;
660
661 uint64_t MaxAlign =
662 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
663
664 // Synthesize a loop or unroll it, depending on the number of iterations.
665 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
666 // between the unaligned rsp and current rsp.
667 if (Offset > ProbeChunk) {
668 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
669 MaxAlign % StackProbeSize);
670 } else {
671 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
672 MaxAlign % StackProbeSize);
673 }
674}
675
676void X86FrameLowering::emitStackProbeInlineGenericBlock(
679 uint64_t AlignOffset) const {
680
681 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
682 const bool HasFP = hasFP(MF);
685 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
686 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
687
688 uint64_t CurrentOffset = 0;
689
690 assert(AlignOffset < StackProbeSize);
691
692 // If the offset is so small it fits within a page, there's nothing to do.
693 if (StackProbeSize < Offset + AlignOffset) {
694
695 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
696 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
698 if (!HasFP && NeedsDwarfCFI) {
699 BuildCFI(
700 MBB, MBBI, DL,
701 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
702 }
703
704 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
706 StackPtr, false, 0)
707 .addImm(0)
709 NumFrameExtraProbe++;
710 CurrentOffset = StackProbeSize - AlignOffset;
711 }
712
713 // For the next N - 1 pages, just probe. I tried to take advantage of
714 // natural probes but it implies much more logic and there was very few
715 // interesting natural probes to interleave.
716 while (CurrentOffset + StackProbeSize < Offset) {
717 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
719
720 if (!HasFP && NeedsDwarfCFI) {
721 BuildCFI(
722 MBB, MBBI, DL,
723 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
724 }
725 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
727 StackPtr, false, 0)
728 .addImm(0)
730 NumFrameExtraProbe++;
731 CurrentOffset += StackProbeSize;
732 }
733
734 // No need to probe the tail, it is smaller than a Page.
735 uint64_t ChunkSize = Offset - CurrentOffset;
736 if (ChunkSize == SlotSize) {
737 // Use push for slot sized adjustments as a size optimization,
738 // like emitSPUpdate does when not probing.
739 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
740 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
741 BuildMI(MBB, MBBI, DL, TII.get(Opc))
744 } else {
745 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
747 }
748 // No need to adjust Dwarf CFA offset here, the last position of the stack has
749 // been defined
750}
751
752void X86FrameLowering::emitStackProbeInlineGenericLoop(
755 uint64_t AlignOffset) const {
756 assert(Offset && "null offset");
757
758 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
760 "Inline stack probe loop will clobber live EFLAGS.");
761
762 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
763 const bool HasFP = hasFP(MF);
766 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
767 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
768
769 if (AlignOffset) {
770 if (AlignOffset < StackProbeSize) {
771 // Perform a first smaller allocation followed by a probe.
772 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
774
775 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
777 StackPtr, false, 0)
778 .addImm(0)
780 NumFrameExtraProbe++;
781 Offset -= AlignOffset;
782 }
783 }
784
785 // Synthesize a loop
786 NumFrameLoopProbe++;
787 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
788
789 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
790 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
791
793 MF.insert(MBBIter, testMBB);
794 MF.insert(MBBIter, tailMBB);
795
796 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
797 : Is64Bit ? X86::R11D
798 : X86::EAX;
799
800 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
803
804 // save loop bound
805 {
806 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
807 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
808 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
809 .addReg(FinalStackProbed)
810 .addImm(BoundOffset)
812
813 // while in the loop, use loop-invariant reg for CFI,
814 // instead of the stack pointer, which changes during the loop
815 if (!HasFP && NeedsDwarfCFI) {
816 // x32 uses the same DWARF register numbers as x86-64,
817 // so there isn't a register number for r11d, we must use r11 instead
818 const Register DwarfFinalStackProbed =
820 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
821 : FinalStackProbed;
822
825 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
827 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
828 }
829 }
830
831 // allocate a page
832 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
833 /*InEpilogue=*/false)
835
836 // touch the page
837 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
839 StackPtr, false, 0)
840 .addImm(0)
842
843 // cmp with stack pointer bound
844 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
846 .addReg(FinalStackProbed)
848
849 // jump
850 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
851 .addMBB(testMBB)
854 testMBB->addSuccessor(testMBB);
855 testMBB->addSuccessor(tailMBB);
856
857 // BB management
858 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
860 MBB.addSuccessor(testMBB);
861
862 // handle tail
863 const uint64_t TailOffset = Offset % StackProbeSize;
864 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
865 if (TailOffset) {
866 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
867 /*InEpilogue=*/false)
869 }
870
871 // after the loop, switch back to stack pointer for CFI
872 if (!HasFP && NeedsDwarfCFI) {
873 // x32 uses the same DWARF register numbers as x86-64,
874 // so there isn't a register number for esp, we must use rsp instead
875 const Register DwarfStackPtr =
879
880 BuildCFI(*tailMBB, TailMBBIter, DL,
882 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
883 }
884
885 // Update Live In information
886 fullyRecomputeLiveIns({tailMBB, testMBB});
887}
888
889void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
891 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
893 assert(STI.is64Bit() && "different expansion needed for 32 bit");
894 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
896 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
897
898 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
900 "Inline stack probe loop will clobber live EFLAGS.");
901
902 // RAX contains the number of bytes of desired stack adjustment.
903 // The handling here assumes this value has already been updated so as to
904 // maintain stack alignment.
905 //
906 // We need to exit with RSP modified by this amount and execute suitable
907 // page touches to notify the OS that we're growing the stack responsibly.
908 // All stack probing must be done without modifying RSP.
909 //
910 // MBB:
911 // SizeReg = RAX;
912 // ZeroReg = 0
913 // CopyReg = RSP
914 // Flags, TestReg = CopyReg - SizeReg
915 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
916 // LimitReg = gs magic thread env access
917 // if FinalReg >= LimitReg goto ContinueMBB
918 // RoundBB:
919 // RoundReg = page address of FinalReg
920 // LoopMBB:
921 // LoopReg = PHI(LimitReg,ProbeReg)
922 // ProbeReg = LoopReg - PageSize
923 // [ProbeReg] = 0
924 // if (ProbeReg > RoundReg) goto LoopMBB
925 // ContinueMBB:
926 // RSP = RSP - RAX
927 // [rest of original MBB]
928
929 // Set up the new basic blocks
930 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
931 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
932 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
933
934 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
935 MF.insert(MBBIter, RoundMBB);
936 MF.insert(MBBIter, LoopMBB);
937 MF.insert(MBBIter, ContinueMBB);
938
939 // Split MBB and move the tail portion down to ContinueMBB.
940 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
941 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
943
944 // Some useful constants
945 const int64_t ThreadEnvironmentStackLimit = 0x10;
946 const int64_t PageSize = 0x1000;
947 const int64_t PageMask = ~(PageSize - 1);
948
949 // Registers we need. For the normal case we use virtual
950 // registers. For the prolog expansion we use RAX, RCX and RDX.
952 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
953 const Register
954 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
955 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
956 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
957 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
958 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
959 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
960 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
961 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
962 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
963
964 // SP-relative offsets where we can save RCX and RDX.
965 int64_t RCXShadowSlot = 0;
966 int64_t RDXShadowSlot = 0;
967
968 // If inlining in the prolog, save RCX and RDX.
969 if (InProlog) {
970 // Compute the offsets. We need to account for things already
971 // pushed onto the stack at this point: return address, frame
972 // pointer (if used), and callee saves.
974 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
975 const bool HasFP = hasFP(MF);
976
977 // Check if we need to spill RCX and/or RDX.
978 // Here we assume that no earlier prologue instruction changes RCX and/or
979 // RDX, so checking the block live-ins is enough.
980 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
981 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
982 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
983 // Assign the initial slot to both registers, then change RDX's slot if both
984 // need to be spilled.
985 if (IsRCXLiveIn)
986 RCXShadowSlot = InitSlot;
987 if (IsRDXLiveIn)
988 RDXShadowSlot = InitSlot;
989 if (IsRDXLiveIn && IsRCXLiveIn)
990 RDXShadowSlot += 8;
991 // Emit the saves if needed.
992 if (IsRCXLiveIn)
993 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
994 RCXShadowSlot)
995 .addReg(X86::RCX);
996 if (IsRDXLiveIn)
997 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
998 RDXShadowSlot)
999 .addReg(X86::RDX);
1000 } else {
1001 // Not in the prolog. Copy RAX to a virtual reg.
1002 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1003 }
1004
1005 // Add code to MBB to check for overflow and set the new target stack pointer
1006 // to zero if so.
1007 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1008 .addReg(ZeroReg, RegState::Undef)
1009 .addReg(ZeroReg, RegState::Undef);
1010 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1011 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1012 .addReg(CopyReg)
1013 .addReg(SizeReg);
1014 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1015 .addReg(TestReg)
1016 .addReg(ZeroReg)
1018
1019 // FinalReg now holds final stack pointer value, or zero if
1020 // allocation would overflow. Compare against the current stack
1021 // limit from the thread environment block. Note this limit is the
1022 // lowest touched page on the stack, not the point at which the OS
1023 // will cause an overflow exception, so this is just an optimization
1024 // to avoid unnecessarily touching pages that are below the current
1025 // SP but already committed to the stack by the OS.
1026 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1027 .addReg(0)
1028 .addImm(1)
1029 .addReg(0)
1030 .addImm(ThreadEnvironmentStackLimit)
1031 .addReg(X86::GS);
1032 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1033 // Jump if the desired stack pointer is at or above the stack limit.
1034 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1035 .addMBB(ContinueMBB)
1037
1038 // Add code to roundMBB to round the final stack pointer to a page boundary.
1039 if (InProlog)
1040 RoundMBB->addLiveIn(FinalReg);
1041 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1042 .addReg(FinalReg)
1043 .addImm(PageMask);
1044 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1045
1046 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1047 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1048 // and probe until we reach RoundedReg.
1049 if (!InProlog) {
1050 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1051 .addReg(LimitReg)
1052 .addMBB(RoundMBB)
1053 .addReg(ProbeReg)
1054 .addMBB(LoopMBB);
1055 }
1056
1057 if (InProlog)
1058 LoopMBB->addLiveIn(JoinReg);
1059 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1060 false, -PageSize);
1061
1062 // Probe by storing a byte onto the stack.
1063 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1064 .addReg(ProbeReg)
1065 .addImm(1)
1066 .addReg(0)
1067 .addImm(0)
1068 .addReg(0)
1069 .addImm(0);
1070
1071 if (InProlog)
1072 LoopMBB->addLiveIn(RoundedReg);
1073 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1074 .addReg(RoundedReg)
1075 .addReg(ProbeReg);
1076 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1077 .addMBB(LoopMBB)
1079
1080 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1081
1082 // If in prolog, restore RDX and RCX.
1083 if (InProlog) {
1084 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1085 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1086 TII.get(X86::MOV64rm), X86::RCX),
1087 X86::RSP, false, RCXShadowSlot);
1088 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1089 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1090 TII.get(X86::MOV64rm), X86::RDX),
1091 X86::RSP, false, RDXShadowSlot);
1092 }
1093
1094 // Now that the probing is done, add code to continueMBB to update
1095 // the stack pointer for real.
1096 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1097 .addReg(X86::RSP)
1098 .addReg(SizeReg);
1099
1100 // Add the control flow edges we need.
1101 MBB.addSuccessor(ContinueMBB);
1102 MBB.addSuccessor(RoundMBB);
1103 RoundMBB->addSuccessor(LoopMBB);
1104 LoopMBB->addSuccessor(ContinueMBB);
1105 LoopMBB->addSuccessor(LoopMBB);
1106
1107 if (InProlog) {
1108 LivePhysRegs LiveRegs;
1109 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1110 }
1111
1112 // Mark all the instructions added to the prolog as frame setup.
1113 if (InProlog) {
1114 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1115 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1116 }
1117 for (MachineInstr &MI : *RoundMBB) {
1119 }
1120 for (MachineInstr &MI : *LoopMBB) {
1122 }
1123 for (MachineInstr &MI :
1124 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1126 }
1127 }
1128}
1129
1130void X86FrameLowering::emitStackProbeCall(
1132 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1133 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1134 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1135
1136 // FIXME: Add indirect thunk support and remove this.
1137 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1138 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1139 "code model and indirect thunks not yet implemented.");
1140
1141 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1143 "Stack probe calls will clobber live EFLAGS.");
1144
1145 unsigned CallOp;
1146 if (Is64Bit)
1147 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1148 else
1149 CallOp = X86::CALLpcrel32;
1150
1152
1154 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1155
1156 // All current stack probes take AX and SP as input, clobber flags, and
1157 // preserve all registers. x86_64 probes leave RSP unmodified.
1159 // For the large code model, we have to call through a register. Use R11,
1160 // as it is scratch in all supported calling conventions.
1161 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1163 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1164 } else {
1165 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1167 }
1168
1169 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1170 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1176
1177 MachineInstr *ModInst = CI;
1178 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1179 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1180 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1181 // themselves. They also does not clobber %rax so we can reuse it when
1182 // adjusting %rsp.
1183 // All other platforms do not specify a particular ABI for the stack probe
1184 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1185 ModInst =
1187 .addReg(SP)
1188 .addReg(AX);
1189 }
1190
1191 // DebugInfo variable locations -- if there's an instruction number for the
1192 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1193 // modifies SP.
1194 if (InstrNum) {
1195 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1196 // Label destination operand of the subtract.
1197 MF.makeDebugValueSubstitution(*InstrNum,
1198 {ModInst->getDebugInstrNum(), 0});
1199 } else {
1200 // Label the call. The operand number is the penultimate operand, zero
1201 // based.
1202 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1204 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1205 }
1206 }
1207
1208 if (InProlog) {
1209 // Apply the frame setup flag to all inserted instrs.
1210 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1211 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1212 }
1213}
1214
1215static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1216 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1217 // and might require smaller successive adjustments.
1218 const uint64_t Win64MaxSEHOffset = 128;
1219 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1220 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1221 return SEHFrameOffset & -16;
1222}
1223
1224// If we're forcing a stack realignment we can't rely on just the frame
1225// info, we need to know the ABI stack alignment as well in case we
1226// have a call out. Otherwise just make sure we have some alignment - we'll
1227// go with the minimum SlotSize.
1229X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1230 const MachineFrameInfo &MFI = MF.getFrameInfo();
1231 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1232 Align StackAlign = getStackAlign();
1233 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1234 if (HasRealign) {
1235 if (MFI.hasCalls())
1236 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1237 else if (MaxAlign < SlotSize)
1238 MaxAlign = Align(SlotSize);
1239 }
1240
1242 if (HasRealign)
1243 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1244 else
1245 MaxAlign = Align(16);
1246 }
1247 return MaxAlign.value();
1248}
1249
1250void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1252 const DebugLoc &DL, unsigned Reg,
1253 uint64_t MaxAlign) const {
1254 uint64_t Val = -MaxAlign;
1255 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1256
1257 MachineFunction &MF = *MBB.getParent();
1259 const X86TargetLowering &TLI = *STI.getTargetLowering();
1260 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1261 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1262
1263 // We want to make sure that (in worst case) less than StackProbeSize bytes
1264 // are not probed after the AND. This assumption is used in
1265 // emitStackProbeInlineGeneric.
1266 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1267 {
1268 NumFrameLoopProbe++;
1269 MachineBasicBlock *entryMBB =
1271 MachineBasicBlock *headMBB =
1273 MachineBasicBlock *bodyMBB =
1275 MachineBasicBlock *footMBB =
1277
1279 MF.insert(MBBIter, entryMBB);
1280 MF.insert(MBBIter, headMBB);
1281 MF.insert(MBBIter, bodyMBB);
1282 MF.insert(MBBIter, footMBB);
1283 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1284 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1285 : Is64Bit ? X86::R11D
1286 : X86::EAX;
1287
1288 // Setup entry block
1289 {
1290
1291 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1292 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1295 MachineInstr *MI =
1296 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1297 .addReg(FinalStackProbed)
1298 .addImm(Val)
1300
1301 // The EFLAGS implicit def is dead.
1302 MI->getOperand(3).setIsDead();
1303
1304 BuildMI(entryMBB, DL,
1305 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1306 .addReg(FinalStackProbed)
1309 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1310 .addMBB(&MBB)
1313 entryMBB->addSuccessor(headMBB);
1314 entryMBB->addSuccessor(&MBB);
1315 }
1316
1317 // Loop entry block
1318
1319 {
1320 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1321 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1323 .addImm(StackProbeSize)
1325
1326 BuildMI(headMBB, DL,
1327 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1329 .addReg(FinalStackProbed)
1331
1332 // jump to the footer if StackPtr < FinalStackProbed
1333 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1334 .addMBB(footMBB)
1337
1338 headMBB->addSuccessor(bodyMBB);
1339 headMBB->addSuccessor(footMBB);
1340 }
1341
1342 // setup loop body
1343 {
1344 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1346 StackPtr, false, 0)
1347 .addImm(0)
1349
1350 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1351 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1353 .addImm(StackProbeSize)
1355
1356 // cmp with stack pointer bound
1357 BuildMI(bodyMBB, DL,
1358 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1359 .addReg(FinalStackProbed)
1362
1363 // jump back while FinalStackProbed < StackPtr
1364 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1365 .addMBB(bodyMBB)
1368 bodyMBB->addSuccessor(bodyMBB);
1369 bodyMBB->addSuccessor(footMBB);
1370 }
1371
1372 // setup loop footer
1373 {
1374 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1375 .addReg(FinalStackProbed)
1377 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1379 StackPtr, false, 0)
1380 .addImm(0)
1382 footMBB->addSuccessor(&MBB);
1383 }
1384
1385 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1386 }
1387 } else {
1388 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1389 .addReg(Reg)
1390 .addImm(Val)
1392
1393 // The EFLAGS implicit def is dead.
1394 MI->getOperand(3).setIsDead();
1395 }
1396}
1397
1399 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1400 // clobbered by any interrupt handler.
1401 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1402 "MF used frame lowering for wrong subtarget");
1403 const Function &Fn = MF.getFunction();
1404 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1405 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1406}
1407
1408/// Return true if we need to use the restricted Windows x64 prologue and
1409/// epilogue code patterns that can be described with WinCFI (.seh_*
1410/// directives).
1411bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1412 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1413}
1414
1415bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1416 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1417}
1418
1419/// Return true if an opcode is part of the REP group of instructions
1420static bool isOpcodeRep(unsigned Opcode) {
1421 switch (Opcode) {
1422 case X86::REPNE_PREFIX:
1423 case X86::REP_MOVSB_32:
1424 case X86::REP_MOVSB_64:
1425 case X86::REP_MOVSD_32:
1426 case X86::REP_MOVSD_64:
1427 case X86::REP_MOVSQ_32:
1428 case X86::REP_MOVSQ_64:
1429 case X86::REP_MOVSW_32:
1430 case X86::REP_MOVSW_64:
1431 case X86::REP_PREFIX:
1432 case X86::REP_STOSB_32:
1433 case X86::REP_STOSB_64:
1434 case X86::REP_STOSD_32:
1435 case X86::REP_STOSD_64:
1436 case X86::REP_STOSQ_32:
1437 case X86::REP_STOSQ_64:
1438 case X86::REP_STOSW_32:
1439 case X86::REP_STOSW_64:
1440 return true;
1441 default:
1442 break;
1443 }
1444 return false;
1445}
1446
1447/// emitPrologue - Push callee-saved registers onto the stack, which
1448/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1449/// space for local variables. Also emit labels used by the exception handler to
1450/// generate the exception handling frames.
1451
1452/*
1453 Here's a gist of what gets emitted:
1454
1455 ; Establish frame pointer, if needed
1456 [if needs FP]
1457 push %rbp
1458 .cfi_def_cfa_offset 16
1459 .cfi_offset %rbp, -16
1460 .seh_pushreg %rpb
1461 mov %rsp, %rbp
1462 .cfi_def_cfa_register %rbp
1463
1464 ; Spill general-purpose registers
1465 [for all callee-saved GPRs]
1466 pushq %<reg>
1467 [if not needs FP]
1468 .cfi_def_cfa_offset (offset from RETADDR)
1469 .seh_pushreg %<reg>
1470
1471 ; If the required stack alignment > default stack alignment
1472 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1473 ; of unknown size in the stack frame.
1474 [if stack needs re-alignment]
1475 and $MASK, %rsp
1476
1477 ; Allocate space for locals
1478 [if target is Windows and allocated space > 4096 bytes]
1479 ; Windows needs special care for allocations larger
1480 ; than one page.
1481 mov $NNN, %rax
1482 call ___chkstk_ms/___chkstk
1483 sub %rax, %rsp
1484 [else]
1485 sub $NNN, %rsp
1486
1487 [if needs FP]
1488 .seh_stackalloc (size of XMM spill slots)
1489 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1490 [else]
1491 .seh_stackalloc NNN
1492
1493 ; Spill XMMs
1494 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1495 ; they may get spilled on any platform, if the current function
1496 ; calls @llvm.eh.unwind.init
1497 [if needs FP]
1498 [for all callee-saved XMM registers]
1499 movaps %<xmm reg>, -MMM(%rbp)
1500 [for all callee-saved XMM registers]
1501 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1502 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1503 [else]
1504 [for all callee-saved XMM registers]
1505 movaps %<xmm reg>, KKK(%rsp)
1506 [for all callee-saved XMM registers]
1507 .seh_savexmm %<xmm reg>, KKK
1508
1509 .seh_endprologue
1510
1511 [if needs base pointer]
1512 mov %rsp, %rbx
1513 [if needs to restore base pointer]
1514 mov %rsp, -MMM(%rbp)
1515
1516 ; Emit CFI info
1517 [if needs FP]
1518 [for all callee-saved registers]
1519 .cfi_offset %<reg>, (offset from %rbp)
1520 [else]
1521 .cfi_def_cfa_offset (offset from RETADDR)
1522 [for all callee-saved registers]
1523 .cfi_offset %<reg>, (offset from %rsp)
1524
1525 Notes:
1526 - .seh directives are emitted only for Windows 64 ABI
1527 - .cv_fpo directives are emitted on win32 when emitting CodeView
1528 - .cfi directives are emitted for all other ABIs
1529 - for 32-bit code, substitute %e?? registers for %r??
1530*/
1531
1533 MachineBasicBlock &MBB) const {
1534 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1535 "MF used frame lowering for wrong subtarget");
1537 MachineFrameInfo &MFI = MF.getFrameInfo();
1538 const Function &Fn = MF.getFunction();
1540 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1541 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1542 bool IsFunclet = MBB.isEHFuncletEntry();
1544 if (Fn.hasPersonalityFn())
1545 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1546 bool FnHasClrFunclet =
1547 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1548 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1549 bool HasFP = hasFP(MF);
1550 bool IsWin64Prologue = isWin64Prologue(MF);
1551 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1552 // FIXME: Emit FPO data for EH funclets.
1553 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1555 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1556 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1558 const Register MachineFramePtr =
1560 : FramePtr;
1561 Register BasePtr = TRI->getBaseRegister();
1562 bool HasWinCFI = false;
1563
1564 // Debug location must be unknown since the first debug location is used
1565 // to determine the end of the prologue.
1566 DebugLoc DL;
1567 Register ArgBaseReg;
1568
1569 // Emit extra prolog for argument stack slot reference.
1570 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1571 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1572 // Creat extra prolog for stack realignment.
1573 ArgBaseReg = MI->getOperand(0).getReg();
1574 // leal 4(%esp), %basereg
1575 // .cfi_def_cfa %basereg, 0
1576 // andl $-128, %esp
1577 // pushl -4(%basereg)
1578 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1579 ArgBaseReg)
1581 .addImm(1)
1582 .addUse(X86::NoRegister)
1584 .addUse(X86::NoRegister)
1586 if (NeedsDwarfCFI) {
1587 // .cfi_def_cfa %basereg, 0
1588 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1589 BuildCFI(MBB, MBBI, DL,
1590 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1592 }
1593 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1594 int64_t Offset = -(int64_t)SlotSize;
1595 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1596 .addReg(ArgBaseReg)
1597 .addImm(1)
1598 .addReg(X86::NoRegister)
1599 .addImm(Offset)
1600 .addReg(X86::NoRegister)
1602 }
1603
1604 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1605 // tail call.
1606 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1607 if (TailCallArgReserveSize && IsWin64Prologue)
1608 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1609
1610 const bool EmitStackProbeCall =
1612 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1613
1614 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1618 // The special symbol below is absolute and has a *value* suitable to be
1619 // combined with the frame pointer directly.
1620 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1621 .addUse(MachineFramePtr)
1622 .addUse(X86::RIP)
1623 .addImm(1)
1624 .addUse(X86::NoRegister)
1625 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1627 .addUse(X86::NoRegister);
1628 break;
1629 }
1630 [[fallthrough]];
1631
1633 assert(
1634 !IsWin64Prologue &&
1635 "win64 prologue does not set the bit 60 in the saved frame pointer");
1636 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1637 .addUse(MachineFramePtr)
1638 .addImm(60)
1640 break;
1641
1643 break;
1644 }
1645 }
1646
1647 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1648 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1649 // stack alignment.
1651 Fn.arg_size() == 2) {
1652 StackSize += 8;
1653 MFI.setStackSize(StackSize);
1654
1655 // Update the stack pointer by pushing a register. This is the instruction
1656 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1657 // Hard-coding the update to a push avoids emitting a second
1658 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1659 // probing isn't needed anyways for an 8-byte update.
1660 // Pushing a register leaves us in a similar situation to a regular
1661 // function call where we know that the address at (rsp-8) is writeable.
1662 // That way we avoid any off-by-ones with stack probing for additional
1663 // stack pointer updates later on.
1664 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1665 .addReg(X86::RAX, RegState::Undef)
1667 }
1668
1669 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1670 // function, and use up to 128 bytes of stack space, don't have a frame
1671 // pointer, calls, or dynamic alloca then we do not need to adjust the
1672 // stack pointer (we fit in the Red Zone). We also check that we don't
1673 // push and pop from the stack.
1674 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1675 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1676 !MFI.adjustsStack() && // No calls.
1677 !EmitStackProbeCall && // No stack probes.
1678 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1679 !MF.shouldSplitStack()) { // Regular stack
1680 uint64_t MinSize =
1682 if (HasFP)
1683 MinSize += SlotSize;
1684 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1685 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1686 MFI.setStackSize(StackSize);
1687 }
1688
1689 // Insert stack pointer adjustment for later moving of return addr. Only
1690 // applies to tail call optimized functions where the callee argument stack
1691 // size is bigger than the callers.
1692 if (TailCallArgReserveSize != 0) {
1693 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1694 /*InEpilogue=*/false)
1696 }
1697
1698 // Mapping for machine moves:
1699 //
1700 // DST: VirtualFP AND
1701 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1702 // ELSE => DW_CFA_def_cfa
1703 //
1704 // SRC: VirtualFP AND
1705 // DST: Register => DW_CFA_def_cfa_register
1706 //
1707 // ELSE
1708 // OFFSET < 0 => DW_CFA_offset_extended_sf
1709 // REG < 64 => DW_CFA_offset + Reg
1710 // ELSE => DW_CFA_offset_extended
1711
1712 uint64_t NumBytes = 0;
1713 int stackGrowth = -SlotSize;
1714
1715 // Find the funclet establisher parameter
1716 Register Establisher = X86::NoRegister;
1717 if (IsClrFunclet)
1718 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1719 else if (IsFunclet)
1720 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1721
1722 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1723 // Immediately spill establisher into the home slot.
1724 // The runtime cares about this.
1725 // MOV64mr %rdx, 16(%rsp)
1726 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1727 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1728 .addReg(Establisher)
1730 MBB.addLiveIn(Establisher);
1731 }
1732
1733 if (HasFP) {
1734 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1735
1736 // Calculate required stack adjustment.
1737 uint64_t FrameSize = StackSize - SlotSize;
1738 NumBytes =
1739 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1740
1741 // Callee-saved registers are pushed on stack before the stack is realigned.
1742 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1743 NumBytes = alignTo(NumBytes, MaxAlign);
1744
1745 // Save EBP/RBP into the appropriate stack slot.
1746 BuildMI(MBB, MBBI, DL,
1748 .addReg(MachineFramePtr, RegState::Kill)
1750
1751 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1752 // Mark the place where EBP/RBP was saved.
1753 // Define the current CFA rule to use the provided offset.
1754 assert(StackSize);
1755 BuildCFI(MBB, MBBI, DL,
1757 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1759
1760 // Change the rule for the FramePtr to be an "offset" rule.
1761 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1762 BuildCFI(MBB, MBBI, DL,
1763 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1764 2 * stackGrowth -
1765 (int)TailCallArgReserveSize),
1767 }
1768
1769 if (NeedsWinCFI) {
1770 HasWinCFI = true;
1771 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1774 }
1775
1776 if (!IsFunclet) {
1777 if (X86FI->hasSwiftAsyncContext()) {
1778 assert(!IsWin64Prologue &&
1779 "win64 prologue does not store async context right below rbp");
1780 const auto &Attrs = MF.getFunction().getAttributes();
1781
1782 // Before we update the live frame pointer we have to ensure there's a
1783 // valid (or null) asynchronous context in its slot just before FP in
1784 // the frame record, so store it now.
1785 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1786 // We have an initial context in r14, store it just before the frame
1787 // pointer.
1788 MBB.addLiveIn(X86::R14);
1789 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1790 .addReg(X86::R14)
1792 } else {
1793 // No initial context, store null so that there's no pointer that
1794 // could be misused.
1795 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1796 .addImm(0)
1798 }
1799
1800 if (NeedsWinCFI) {
1801 HasWinCFI = true;
1802 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1803 .addImm(X86::R14)
1805 }
1806
1807 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1808 .addUse(X86::RSP)
1809 .addImm(1)
1810 .addUse(X86::NoRegister)
1811 .addImm(8)
1812 .addUse(X86::NoRegister)
1814 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1815 .addUse(X86::RSP)
1816 .addImm(8)
1818 }
1819
1820 if (!IsWin64Prologue && !IsFunclet) {
1821 // Update EBP with the new base value.
1822 if (!X86FI->hasSwiftAsyncContext())
1823 BuildMI(MBB, MBBI, DL,
1824 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1825 FramePtr)
1828
1829 if (NeedsDwarfCFI) {
1830 if (ArgBaseReg.isValid()) {
1831 SmallString<64> CfaExpr;
1832 CfaExpr.push_back(dwarf::DW_CFA_expression);
1833 uint8_t buffer[16];
1834 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1835 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1836 CfaExpr.push_back(2);
1837 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1838 CfaExpr.push_back(0);
1839 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1840 BuildCFI(MBB, MBBI, DL,
1841 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1843 } else {
1844 // Mark effective beginning of when frame pointer becomes valid.
1845 // Define the current CFA to use the EBP/RBP register.
1846 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1847 BuildCFI(
1848 MBB, MBBI, DL,
1849 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1851 }
1852 }
1853
1854 if (NeedsWinFPO) {
1855 // .cv_fpo_setframe $FramePtr
1856 HasWinCFI = true;
1857 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1859 .addImm(0)
1861 }
1862 }
1863 }
1864 } else {
1865 assert(!IsFunclet && "funclets without FPs not yet implemented");
1866 NumBytes =
1867 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1868 }
1869
1870 // Update the offset adjustment, which is mainly used by codeview to translate
1871 // from ESP to VFRAME relative local variable offsets.
1872 if (!IsFunclet) {
1873 if (HasFP && TRI->hasStackRealignment(MF))
1874 MFI.setOffsetAdjustment(-NumBytes);
1875 else
1876 MFI.setOffsetAdjustment(-StackSize);
1877 }
1878
1879 // For EH funclets, only allocate enough space for outgoing calls. Save the
1880 // NumBytes value that we would've used for the parent frame.
1881 unsigned ParentFrameNumBytes = NumBytes;
1882 if (IsFunclet)
1883 NumBytes = getWinEHFuncletFrameSize(MF);
1884
1885 // Skip the callee-saved push instructions.
1886 bool PushedRegs = false;
1887 int StackOffset = 2 * stackGrowth;
1889 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1890 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1891 return false;
1892 unsigned Opc = MBBI->getOpcode();
1893 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1894 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1895 };
1896
1897 while (IsCSPush(MBBI)) {
1898 PushedRegs = true;
1899 Register Reg = MBBI->getOperand(0).getReg();
1900 LastCSPush = MBBI;
1901 ++MBBI;
1902 unsigned Opc = LastCSPush->getOpcode();
1903
1904 if (!HasFP && NeedsDwarfCFI) {
1905 // Mark callee-saved push instruction.
1906 // Define the current CFA rule to use the provided offset.
1907 assert(StackSize);
1908 // Compared to push, push2 introduces more stack offset (one more
1909 // register).
1910 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1911 StackOffset += stackGrowth;
1912 BuildCFI(MBB, MBBI, DL,
1915 StackOffset += stackGrowth;
1916 }
1917
1918 if (NeedsWinCFI) {
1919 HasWinCFI = true;
1920 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1921 .addImm(Reg)
1923 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1924 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1925 .addImm(LastCSPush->getOperand(1).getReg())
1927 }
1928 }
1929
1930 // Realign stack after we pushed callee-saved registers (so that we'll be
1931 // able to calculate their offsets from the frame pointer).
1932 // Don't do this for Win64, it needs to realign the stack after the prologue.
1933 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1934 !ArgBaseReg.isValid()) {
1935 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1936 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1937
1938 if (NeedsWinCFI) {
1939 HasWinCFI = true;
1940 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1941 .addImm(MaxAlign)
1943 }
1944 }
1945
1946 // If there is an SUB32ri of ESP immediately before this instruction, merge
1947 // the two. This can be the case when tail call elimination is enabled and
1948 // the callee has more arguments then the caller.
1949 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1950
1951 // Adjust stack pointer: ESP -= numbytes.
1952
1953 // Windows and cygwin/mingw require a prologue helper routine when allocating
1954 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1955 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1956 // stack and adjust the stack pointer in one go. The 64-bit version of
1957 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1958 // responsible for adjusting the stack pointer. Touching the stack at 4K
1959 // increments is necessary to ensure that the guard pages used by the OS
1960 // virtual memory manager are allocated in correct sequence.
1961 uint64_t AlignedNumBytes = NumBytes;
1962 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1963 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1964 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1965 assert(!X86FI->getUsesRedZone() &&
1966 "The Red Zone is not accounted for in stack probes");
1967
1968 // Check whether EAX is livein for this block.
1969 bool isEAXAlive = isEAXLiveIn(MBB);
1970
1971 if (isEAXAlive) {
1972 if (Is64Bit) {
1973 // Save RAX
1974 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1975 .addReg(X86::RAX, RegState::Kill)
1977 } else {
1978 // Save EAX
1979 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1980 .addReg(X86::EAX, RegState::Kill)
1982 }
1983 }
1984
1985 if (Is64Bit) {
1986 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1987 // Function prologue is responsible for adjusting the stack pointer.
1988 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1989 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1990 .addImm(Alloc)
1992 } else {
1993 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1994 // We'll also use 4 already allocated bytes for EAX.
1995 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1996 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1998 }
1999
2000 // Call __chkstk, __chkstk_ms, or __alloca.
2001 emitStackProbe(MF, MBB, MBBI, DL, true);
2002
2003 if (isEAXAlive) {
2004 // Restore RAX/EAX
2006 if (Is64Bit)
2007 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2008 StackPtr, false, NumBytes - 8);
2009 else
2010 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2011 StackPtr, false, NumBytes - 4);
2012 MI->setFlag(MachineInstr::FrameSetup);
2013 MBB.insert(MBBI, MI);
2014 }
2015 } else if (NumBytes) {
2016 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2017 }
2018
2019 if (NeedsWinCFI && NumBytes) {
2020 HasWinCFI = true;
2021 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2022 .addImm(NumBytes)
2024 }
2025
2026 int SEHFrameOffset = 0;
2027 unsigned SPOrEstablisher;
2028 if (IsFunclet) {
2029 if (IsClrFunclet) {
2030 // The establisher parameter passed to a CLR funclet is actually a pointer
2031 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2032 // to find the root function establisher frame by loading the PSPSym from
2033 // the intermediate frame.
2034 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2035 MachinePointerInfo NoInfo;
2036 MBB.addLiveIn(Establisher);
2037 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2038 Establisher, false, PSPSlotOffset)
2041 ;
2042 // Save the root establisher back into the current funclet's (mostly
2043 // empty) frame, in case a sub-funclet or the GC needs it.
2044 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2045 false, PSPSlotOffset)
2046 .addReg(Establisher)
2048 NoInfo,
2051 }
2052 SPOrEstablisher = Establisher;
2053 } else {
2054 SPOrEstablisher = StackPtr;
2055 }
2056
2057 if (IsWin64Prologue && HasFP) {
2058 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2059 // this calculation on the incoming establisher, which holds the value of
2060 // RSP from the parent frame at the end of the prologue.
2061 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2062 if (SEHFrameOffset)
2063 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2064 SPOrEstablisher, false, SEHFrameOffset);
2065 else
2066 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2067 .addReg(SPOrEstablisher);
2068
2069 // If this is not a funclet, emit the CFI describing our frame pointer.
2070 if (NeedsWinCFI && !IsFunclet) {
2071 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2072 HasWinCFI = true;
2073 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2075 .addImm(SEHFrameOffset)
2077 if (isAsynchronousEHPersonality(Personality))
2078 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2079 }
2080 } else if (IsFunclet && STI.is32Bit()) {
2081 // Reset EBP / ESI to something good for funclets.
2083 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2084 // into the registration node so that the runtime will restore it for us.
2085 if (!MBB.isCleanupFuncletEntry()) {
2086 assert(Personality == EHPersonality::MSVC_CXX);
2087 Register FrameReg;
2089 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2090 // ESP is the first field, so no extra displacement is needed.
2091 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2092 false, EHRegOffset)
2093 .addReg(X86::ESP);
2094 }
2095 }
2096
2097 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2098 const MachineInstr &FrameInstr = *MBBI;
2099 ++MBBI;
2100
2101 if (NeedsWinCFI) {
2102 int FI;
2103 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2104 if (X86::FR64RegClass.contains(Reg)) {
2105 int Offset;
2106 Register IgnoredFrameReg;
2107 if (IsWin64Prologue && IsFunclet)
2108 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2109 else
2110 Offset =
2111 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2112 SEHFrameOffset;
2113
2114 HasWinCFI = true;
2115 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2116 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2117 .addImm(Reg)
2118 .addImm(Offset)
2120 }
2121 }
2122 }
2123 }
2124
2125 if (NeedsWinCFI && HasWinCFI)
2126 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2128
2129 if (FnHasClrFunclet && !IsFunclet) {
2130 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2131 // immediately after the prolog) into the PSPSlot so that funclets
2132 // and the GC can recover it.
2133 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2134 auto PSPInfo = MachinePointerInfo::getFixedStack(
2136 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2137 PSPSlotOffset)
2142 }
2143
2144 // Realign stack after we spilled callee-saved registers (so that we'll be
2145 // able to calculate their offsets from the frame pointer).
2146 // Win64 requires aligning the stack after the prologue.
2147 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2148 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2149 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2150 }
2151
2152 // We already dealt with stack realignment and funclets above.
2153 if (IsFunclet && STI.is32Bit())
2154 return;
2155
2156 // If we need a base pointer, set it up here. It's whatever the value
2157 // of the stack pointer is at this point. Any variable size objects
2158 // will be allocated after this, so we can still use the base pointer
2159 // to reference locals.
2160 if (TRI->hasBasePointer(MF)) {
2161 // Update the base pointer with the current stack pointer.
2162 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2163 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2164 .addReg(SPOrEstablisher)
2166 if (X86FI->getRestoreBasePointer()) {
2167 // Stash value of base pointer. Saving RSP instead of EBP shortens
2168 // dependence chain. Used by SjLj EH.
2169 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2170 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2172 .addReg(SPOrEstablisher)
2174 }
2175
2176 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2177 // Stash the value of the frame pointer relative to the base pointer for
2178 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2179 // it recovers the frame pointer from the base pointer rather than the
2180 // other way around.
2181 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2182 Register UsedReg;
2183 int Offset =
2184 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2185 .getFixed();
2186 assert(UsedReg == BasePtr);
2187 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2190 }
2191 }
2192 if (ArgBaseReg.isValid()) {
2193 // Save argument base pointer.
2194 auto *MI = X86FI->getStackPtrSaveMI();
2195 int FI = MI->getOperand(1).getIndex();
2196 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2197 // movl %basereg, offset(%ebp)
2198 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2199 .addReg(ArgBaseReg)
2201 }
2202
2203 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2204 // Mark end of stack pointer adjustment.
2205 if (!HasFP && NumBytes) {
2206 // Define the current CFA rule to use the provided offset.
2207 assert(StackSize);
2208 BuildCFI(
2209 MBB, MBBI, DL,
2210 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2212 }
2213
2214 // Emit DWARF info specifying the offsets of the callee-saved registers.
2216 }
2217
2218 // X86 Interrupt handling function cannot assume anything about the direction
2219 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2220 // in each prologue of interrupt handler function.
2221 //
2222 // Create "cld" instruction only in these cases:
2223 // 1. The interrupt handling function uses any of the "rep" instructions.
2224 // 2. Interrupt handling function calls another function.
2225 // 3. If there are any inline asm blocks, as we do not know what they do
2226 //
2227 // TODO: We should also emit cld if we detect the use of std, but as of now,
2228 // the compiler does not even emit that instruction or even define it, so in
2229 // practice, this would only happen with inline asm, which we cover anyway.
2231 bool NeedsCLD = false;
2232
2233 for (const MachineBasicBlock &B : MF) {
2234 for (const MachineInstr &MI : B) {
2235 if (MI.isCall()) {
2236 NeedsCLD = true;
2237 break;
2238 }
2239
2240 if (isOpcodeRep(MI.getOpcode())) {
2241 NeedsCLD = true;
2242 break;
2243 }
2244
2245 if (MI.isInlineAsm()) {
2246 // TODO: Parse asm for rep instructions or call sites?
2247 // For now, let's play it safe and emit a cld instruction
2248 // just in case.
2249 NeedsCLD = true;
2250 break;
2251 }
2252 }
2253 }
2254
2255 if (NeedsCLD) {
2256 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2258 }
2259 }
2260
2261 // At this point we know if the function has WinCFI or not.
2262 MF.setHasWinCFI(HasWinCFI);
2263}
2264
2266 const MachineFunction &MF) const {
2267 // We can't use LEA instructions for adjusting the stack pointer if we don't
2268 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2269 // to deallocate the stack.
2270 // This means that we can use LEA for SP in two situations:
2271 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2272 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2273 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2274}
2275
2277 switch (MI.getOpcode()) {
2278 case X86::CATCHRET:
2279 case X86::CLEANUPRET:
2280 return true;
2281 default:
2282 return false;
2283 }
2284 llvm_unreachable("impossible");
2285}
2286
2287// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2288// stack. It holds a pointer to the bottom of the root function frame. The
2289// establisher frame pointer passed to a nested funclet may point to the
2290// (mostly empty) frame of its parent funclet, but it will need to find
2291// the frame of the root function to access locals. To facilitate this,
2292// every funclet copies the pointer to the bottom of the root function
2293// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2294// same offset for the PSPSym in the root function frame that's used in the
2295// funclets' frames allows each funclet to dynamically accept any ancestor
2296// frame as its establisher argument (the runtime doesn't guarantee the
2297// immediate parent for some reason lost to history), and also allows the GC,
2298// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2299// frame with only a single offset reported for the entire method.
2300unsigned
2301X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2302 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2304 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2305 /*IgnoreSPUpdates*/ true)
2306 .getFixed();
2307 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2308 return static_cast<unsigned>(Offset);
2309}
2310
2311unsigned
2312X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2314 // This is the size of the pushed CSRs.
2315 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2316 // This is the size of callee saved XMMs.
2317 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2318 unsigned XMMSize =
2319 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2320 // This is the amount of stack a funclet needs to allocate.
2321 unsigned UsedSize;
2322 EHPersonality Personality =
2324 if (Personality == EHPersonality::CoreCLR) {
2325 // CLR funclets need to hold enough space to include the PSPSym, at the
2326 // same offset from the stack pointer (immediately after the prolog) as it
2327 // resides at in the main function.
2328 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2329 } else {
2330 // Other funclets just need enough stack for outgoing call arguments.
2331 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2332 }
2333 // RBP is not included in the callee saved register block. After pushing RBP,
2334 // everything is 16 byte aligned. Everything we allocate before an outgoing
2335 // call must also be 16 byte aligned.
2336 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2337 // Subtract out the size of the callee saved registers. This is how much stack
2338 // each funclet will allocate.
2339 return FrameSizeMinusRBP + XMMSize - CSSize;
2340}
2341
2342static bool isTailCallOpcode(unsigned Opc) {
2343 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2344 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2345 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2346}
2347
2349 MachineBasicBlock &MBB) const {
2350 const MachineFrameInfo &MFI = MF.getFrameInfo();
2353 MachineBasicBlock::iterator MBBI = Terminator;
2354 DebugLoc DL;
2355 if (MBBI != MBB.end())
2356 DL = MBBI->getDebugLoc();
2357 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2358 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2360 Register MachineFramePtr =
2361 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2362
2363 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2364 bool NeedsWin64CFI =
2365 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2366 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2367
2368 // Get the number of bytes to allocate from the FrameInfo.
2369 uint64_t StackSize = MFI.getStackSize();
2370 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2371 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2372 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2373 bool HasFP = hasFP(MF);
2374 uint64_t NumBytes = 0;
2375
2376 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2378 MF.needsFrameMoves();
2379
2380 Register ArgBaseReg;
2381 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2382 unsigned Opc = X86::LEA32r;
2383 Register StackReg = X86::ESP;
2384 ArgBaseReg = MI->getOperand(0).getReg();
2385 if (STI.is64Bit()) {
2386 Opc = X86::LEA64r;
2387 StackReg = X86::RSP;
2388 }
2389 // leal -4(%basereg), %esp
2390 // .cfi_def_cfa %esp, 4
2391 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2392 .addUse(ArgBaseReg)
2393 .addImm(1)
2394 .addUse(X86::NoRegister)
2395 .addImm(-(int64_t)SlotSize)
2396 .addUse(X86::NoRegister)
2398 if (NeedsDwarfCFI) {
2399 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2400 BuildCFI(MBB, MBBI, DL,
2401 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2403 --MBBI;
2404 }
2405 --MBBI;
2406 }
2407
2408 if (IsFunclet) {
2409 assert(HasFP && "EH funclets without FP not yet implemented");
2410 NumBytes = getWinEHFuncletFrameSize(MF);
2411 } else if (HasFP) {
2412 // Calculate required stack adjustment.
2413 uint64_t FrameSize = StackSize - SlotSize;
2414 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2415
2416 // Callee-saved registers were pushed on stack before the stack was
2417 // realigned.
2418 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2419 NumBytes = alignTo(FrameSize, MaxAlign);
2420 } else {
2421 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2422 }
2423 uint64_t SEHStackAllocAmt = NumBytes;
2424
2425 // AfterPop is the position to insert .cfi_restore.
2427 if (HasFP) {
2428 if (X86FI->hasSwiftAsyncContext()) {
2429 // Discard the context.
2430 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2431 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2432 }
2433 // Pop EBP.
2434 BuildMI(MBB, MBBI, DL,
2436 MachineFramePtr)
2438
2439 // We need to reset FP to its untagged state on return. Bit 60 is currently
2440 // used to show the presence of an extended frame.
2441 if (X86FI->hasSwiftAsyncContext()) {
2442 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2443 .addUse(MachineFramePtr)
2444 .addImm(60)
2446 }
2447
2448 if (NeedsDwarfCFI) {
2449 if (!ArgBaseReg.isValid()) {
2450 unsigned DwarfStackPtr =
2451 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2452 BuildCFI(MBB, MBBI, DL,
2453 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2455 }
2456 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2457 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2458 BuildCFI(MBB, AfterPop, DL,
2459 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2461 --MBBI;
2462 --AfterPop;
2463 }
2464 --MBBI;
2465 }
2466 }
2467
2468 MachineBasicBlock::iterator FirstCSPop = MBBI;
2469 // Skip the callee-saved pop instructions.
2470 while (MBBI != MBB.begin()) {
2471 MachineBasicBlock::iterator PI = std::prev(MBBI);
2472 unsigned Opc = PI->getOpcode();
2473
2474 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2475 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2476 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2477 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2478 Opc != X86::POP2P && Opc != X86::LEA64r))
2479 break;
2480 FirstCSPop = PI;
2481 }
2482
2483 --MBBI;
2484 }
2485 if (ArgBaseReg.isValid()) {
2486 // Restore argument base pointer.
2487 auto *MI = X86FI->getStackPtrSaveMI();
2488 int FI = MI->getOperand(1).getIndex();
2489 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2490 // movl offset(%ebp), %basereg
2491 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2493 }
2494 MBBI = FirstCSPop;
2495
2496 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2497 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2498
2499 if (MBBI != MBB.end())
2500 DL = MBBI->getDebugLoc();
2501 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2502 // instruction, merge the two instructions.
2503 if (NumBytes || MFI.hasVarSizedObjects())
2504 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2505
2506 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2507 // slot before popping them off! Same applies for the case, when stack was
2508 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2509 // will not do realignment or dynamic stack allocation.
2510 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2511 !IsFunclet) {
2512 if (TRI->hasStackRealignment(MF))
2513 MBBI = FirstCSPop;
2514 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2515 uint64_t LEAAmount =
2516 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2517
2518 if (X86FI->hasSwiftAsyncContext())
2519 LEAAmount -= 16;
2520
2521 // There are only two legal forms of epilogue:
2522 // - add SEHAllocationSize, %rsp
2523 // - lea SEHAllocationSize(%FramePtr), %rsp
2524 //
2525 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2526 // However, we may use this sequence if we have a frame pointer because the
2527 // effects of the prologue can safely be undone.
2528 if (LEAAmount != 0) {
2529 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2531 false, LEAAmount);
2532 --MBBI;
2533 } else {
2534 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2535 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2536 --MBBI;
2537 }
2538 } else if (NumBytes) {
2539 // Adjust stack pointer back: ESP += numbytes.
2540 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2541 if (!HasFP && NeedsDwarfCFI) {
2542 // Define the current CFA rule to use the provided offset.
2543 BuildCFI(MBB, MBBI, DL,
2545 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2547 }
2548 --MBBI;
2549 }
2550
2551 // Windows unwinder will not invoke function's exception handler if IP is
2552 // either in prologue or in epilogue. This behavior causes a problem when a
2553 // call immediately precedes an epilogue, because the return address points
2554 // into the epilogue. To cope with that, we insert an epilogue marker here,
2555 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2556 // final emitted code.
2557 if (NeedsWin64CFI && MF.hasWinCFI())
2558 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2559
2560 if (!HasFP && NeedsDwarfCFI) {
2561 MBBI = FirstCSPop;
2562 int64_t Offset = -(int64_t)CSSize - SlotSize;
2563 // Mark callee-saved pop instruction.
2564 // Define the current CFA rule to use the provided offset.
2565 while (MBBI != MBB.end()) {
2567 unsigned Opc = PI->getOpcode();
2568 ++MBBI;
2569 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2570 Opc == X86::POP2 || Opc == X86::POP2P) {
2571 Offset += SlotSize;
2572 // Compared to pop, pop2 introduces more stack offset (one more
2573 // register).
2574 if (Opc == X86::POP2 || Opc == X86::POP2P)
2575 Offset += SlotSize;
2576 BuildCFI(MBB, MBBI, DL,
2579 }
2580 }
2581 }
2582
2583 // Emit DWARF info specifying the restores of the callee-saved registers.
2584 // For epilogue with return inside or being other block without successor,
2585 // no need to generate .cfi_restore for callee-saved registers.
2586 if (NeedsDwarfCFI && !MBB.succ_empty())
2587 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2588
2589 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2590 // Add the return addr area delta back since we are not tail calling.
2591 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2592 assert(Offset >= 0 && "TCDelta should never be positive");
2593 if (Offset) {
2594 // Check for possible merge with preceding ADD instruction.
2595 Offset += mergeSPUpdates(MBB, Terminator, true);
2596 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2597 }
2598 }
2599
2600 // Emit tilerelease for AMX kernel.
2602 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2603}
2604
2606 int FI,
2607 Register &FrameReg) const {
2608 const MachineFrameInfo &MFI = MF.getFrameInfo();
2609
2610 bool IsFixed = MFI.isFixedObjectIndex(FI);
2611 // We can't calculate offset from frame pointer if the stack is realigned,
2612 // so enforce usage of stack/base pointer. The base pointer is used when we
2613 // have dynamic allocas in addition to dynamic realignment.
2614 if (TRI->hasBasePointer(MF))
2615 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2616 else if (TRI->hasStackRealignment(MF))
2617 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2618 else
2619 FrameReg = TRI->getFrameRegister(MF);
2620
2621 // Offset will hold the offset from the stack pointer at function entry to the
2622 // object.
2623 // We need to factor in additional offsets applied during the prologue to the
2624 // frame, base, and stack pointer depending on which is used.
2627 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2628 uint64_t StackSize = MFI.getStackSize();
2629 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2630 int64_t FPDelta = 0;
2631
2632 // In an x86 interrupt, remove the offset we added to account for the return
2633 // address from any stack object allocated in the caller's frame. Interrupts
2634 // do not have a standard return address. Fixed objects in the current frame,
2635 // such as SSE register spills, should not get this treatment.
2637 Offset >= 0) {
2639 }
2640
2641 if (IsWin64Prologue) {
2642 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2643
2644 // Calculate required stack adjustment.
2645 uint64_t FrameSize = StackSize - SlotSize;
2646 // If required, include space for extra hidden slot for stashing base
2647 // pointer.
2648 if (X86FI->getRestoreBasePointer())
2649 FrameSize += SlotSize;
2650 uint64_t NumBytes = FrameSize - CSSize;
2651
2652 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2653 if (FI && FI == X86FI->getFAIndex())
2654 return StackOffset::getFixed(-SEHFrameOffset);
2655
2656 // FPDelta is the offset from the "traditional" FP location of the old base
2657 // pointer followed by return address and the location required by the
2658 // restricted Win64 prologue.
2659 // Add FPDelta to all offsets below that go through the frame pointer.
2660 FPDelta = FrameSize - SEHFrameOffset;
2661 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2662 "FPDelta isn't aligned per the Win64 ABI!");
2663 }
2664
2665 if (FrameReg == TRI->getFramePtr()) {
2666 // Skip saved EBP/RBP
2667 Offset += SlotSize;
2668
2669 // Account for restricted Windows prologue.
2670 Offset += FPDelta;
2671
2672 // Skip the RETADDR move area
2673 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2674 if (TailCallReturnAddrDelta < 0)
2675 Offset -= TailCallReturnAddrDelta;
2676
2678 }
2679
2680 // FrameReg is either the stack pointer or a base pointer. But the base is
2681 // located at the end of the statically known StackSize so the distinction
2682 // doesn't really matter.
2683 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2684 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2685 return StackOffset::getFixed(Offset + StackSize);
2686}
2687
2689 Register &FrameReg) const {
2690 const MachineFrameInfo &MFI = MF.getFrameInfo();
2692 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2693 const auto it = WinEHXMMSlotInfo.find(FI);
2694
2695 if (it == WinEHXMMSlotInfo.end())
2696 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2697
2698 FrameReg = TRI->getStackRegister();
2700 it->second;
2701}
2702
2705 Register &FrameReg,
2706 int Adjustment) const {
2707 const MachineFrameInfo &MFI = MF.getFrameInfo();
2708 FrameReg = TRI->getStackRegister();
2709 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2710 getOffsetOfLocalArea() + Adjustment);
2711}
2712
2715 int FI, Register &FrameReg,
2716 bool IgnoreSPUpdates) const {
2717
2718 const MachineFrameInfo &MFI = MF.getFrameInfo();
2719 // Does not include any dynamic realign.
2720 const uint64_t StackSize = MFI.getStackSize();
2721 // LLVM arranges the stack as follows:
2722 // ...
2723 // ARG2
2724 // ARG1
2725 // RETADDR
2726 // PUSH RBP <-- RBP points here
2727 // PUSH CSRs
2728 // ~~~~~~~ <-- possible stack realignment (non-win64)
2729 // ...
2730 // STACK OBJECTS
2731 // ... <-- RSP after prologue points here
2732 // ~~~~~~~ <-- possible stack realignment (win64)
2733 //
2734 // if (hasVarSizedObjects()):
2735 // ... <-- "base pointer" (ESI/RBX) points here
2736 // DYNAMIC ALLOCAS
2737 // ... <-- RSP points here
2738 //
2739 // Case 1: In the simple case of no stack realignment and no dynamic
2740 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2741 // with fixed offsets from RSP.
2742 //
2743 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2744 // stack objects are addressed with RBP and regular stack objects with RSP.
2745 //
2746 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2747 // to address stack arguments for outgoing calls and nothing else. The "base
2748 // pointer" points to local variables, and RBP points to fixed objects.
2749 //
2750 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2751 // answer we give is relative to the SP after the prologue, and not the
2752 // SP in the middle of the function.
2753
2754 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2755 !STI.isTargetWin64())
2756 return getFrameIndexReference(MF, FI, FrameReg);
2757
2758 // If !hasReservedCallFrame the function might have SP adjustement in the
2759 // body. So, even though the offset is statically known, it depends on where
2760 // we are in the function.
2761 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2762 return getFrameIndexReference(MF, FI, FrameReg);
2763
2764 // We don't handle tail calls, and shouldn't be seeing them either.
2766 "we don't handle this case!");
2767
2768 // This is how the math works out:
2769 //
2770 // %rsp grows (i.e. gets lower) left to right. Each box below is
2771 // one word (eight bytes). Obj0 is the stack slot we're trying to
2772 // get to.
2773 //
2774 // ----------------------------------
2775 // | BP | Obj0 | Obj1 | ... | ObjN |
2776 // ----------------------------------
2777 // ^ ^ ^ ^
2778 // A B C E
2779 //
2780 // A is the incoming stack pointer.
2781 // (B - A) is the local area offset (-8 for x86-64) [1]
2782 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2783 //
2784 // |(E - B)| is the StackSize (absolute value, positive). For a
2785 // stack that grown down, this works out to be (B - E). [3]
2786 //
2787 // E is also the value of %rsp after stack has been set up, and we
2788 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2789 // (C - E) == (C - A) - (B - A) + (B - E)
2790 // { Using [1], [2] and [3] above }
2791 // == getObjectOffset - LocalAreaOffset + StackSize
2792
2793 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2794}
2795
2798 std::vector<CalleeSavedInfo> &CSI) const {
2799 MachineFrameInfo &MFI = MF.getFrameInfo();
2801
2802 unsigned CalleeSavedFrameSize = 0;
2803 unsigned XMMCalleeSavedFrameSize = 0;
2804 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2805 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2806
2807 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2808
2809 if (TailCallReturnAddrDelta < 0) {
2810 // create RETURNADDR area
2811 // arg
2812 // arg
2813 // RETADDR
2814 // { ...
2815 // RETADDR area
2816 // ...
2817 // }
2818 // [EBP]
2819 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2820 TailCallReturnAddrDelta - SlotSize, true);
2821 }
2822
2823 // Spill the BasePtr if it's used.
2824 if (this->TRI->hasBasePointer(MF)) {
2825 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2826 if (MF.hasEHFunclets()) {
2828 X86FI->setHasSEHFramePtrSave(true);
2829 X86FI->setSEHFramePtrSaveIndex(FI);
2830 }
2831 }
2832
2833 if (hasFP(MF)) {
2834 // emitPrologue always spills frame register the first thing.
2835 SpillSlotOffset -= SlotSize;
2836 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2837
2838 // The async context lives directly before the frame pointer, and we
2839 // allocate a second slot to preserve stack alignment.
2840 if (X86FI->hasSwiftAsyncContext()) {
2841 SpillSlotOffset -= SlotSize;
2842 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2843 SpillSlotOffset -= SlotSize;
2844 }
2845
2846 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2847 // the frame register, we can delete it from CSI list and not have to worry
2848 // about avoiding it later.
2850 for (unsigned i = 0; i < CSI.size(); ++i) {
2851 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2852 CSI.erase(CSI.begin() + i);
2853 break;
2854 }
2855 }
2856 }
2857
2858 // Strategy:
2859 // 1. Use push2 when
2860 // a) number of CSR > 1 if no need padding
2861 // b) number of CSR > 2 if need padding
2862 // 2. When the number of CSR push is odd
2863 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2864 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2865 // 3. When the number of CSR push is even, start to use push2 from the 1st
2866 // push and make the stack 16B aligned before the push
2867 unsigned NumRegsForPush2 = 0;
2868 if (STI.hasPush2Pop2()) {
2869 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2870 return X86::GR64RegClass.contains(I.getReg());
2871 });
2872 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2873 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2874 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2875 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2876 if (X86FI->padForPush2Pop2()) {
2877 SpillSlotOffset -= SlotSize;
2878 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2879 }
2880 }
2881
2882 // Assign slots for GPRs. It increases frame size.
2883 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2884 Register Reg = I.getReg();
2885
2886 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2887 continue;
2888
2889 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2890 // or only an odd number of registers in the candidates.
2891 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2892 (SpillSlotOffset % 16 == 0 ||
2893 X86FI->getNumCandidatesForPush2Pop2() % 2))
2894 X86FI->addCandidateForPush2Pop2(Reg);
2895
2896 SpillSlotOffset -= SlotSize;
2897 CalleeSavedFrameSize += SlotSize;
2898
2899 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2900 I.setFrameIdx(SlotIndex);
2901 }
2902
2903 // Adjust the offset of spill slot as we know the accurate callee saved frame
2904 // size.
2905 if (X86FI->getRestoreBasePointer()) {
2906 SpillSlotOffset -= SlotSize;
2907 CalleeSavedFrameSize += SlotSize;
2908
2909 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2910 // TODO: saving the slot index is better?
2911 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2912 }
2913 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2914 "Expect even candidates for push2/pop2");
2915 if (X86FI->getNumCandidatesForPush2Pop2())
2916 ++NumFunctionUsingPush2Pop2;
2917 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2918 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2919
2920 // Assign slots for XMMs.
2921 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2922 Register Reg = I.getReg();
2923 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2924 continue;
2925
2926 // If this is k-register make sure we lookup via the largest legal type.
2927 MVT VT = MVT::Other;
2928 if (X86::VK16RegClass.contains(Reg))
2929 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2930
2931 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2932 unsigned Size = TRI->getSpillSize(*RC);
2933 Align Alignment = TRI->getSpillAlign(*RC);
2934 // ensure alignment
2935 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2936 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2937
2938 // spill into slot
2939 SpillSlotOffset -= Size;
2940 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2941 I.setFrameIdx(SlotIndex);
2942 MFI.ensureMaxAlignment(Alignment);
2943
2944 // Save the start offset and size of XMM in stack frame for funclets.
2945 if (X86::VR128RegClass.contains(Reg)) {
2946 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2947 XMMCalleeSavedFrameSize += Size;
2948 }
2949 }
2950
2951 return true;
2952}
2953
2958
2959 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2960 // for us, and there are no XMM CSRs on Win32.
2961 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2962 return true;
2963
2964 // Push GPRs. It increases frame size.
2965 const MachineFunction &MF = *MBB.getParent();
2967 if (X86FI->padForPush2Pop2())
2968 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2969
2970 // Update LiveIn of the basic block and decide whether we can add a kill flag
2971 // to the use.
2972 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
2973 const MachineRegisterInfo &MRI = MF.getRegInfo();
2974 // Do not set a kill flag on values that are also marked as live-in. This
2975 // happens with the @llvm-returnaddress intrinsic and with arguments
2976 // passed in callee saved registers.
2977 // Omitting the kill flags is conservatively correct even if the live-in
2978 // is not used after all.
2979 if (MRI.isLiveIn(Reg))
2980 return false;
2981 MBB.addLiveIn(Reg);
2982 // Check if any subregister is live-in
2983 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
2984 if (MRI.isLiveIn(*AReg))
2985 return false;
2986 return true;
2987 };
2988 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
2989 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
2990 };
2991
2992 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
2993 Register Reg = RI->getReg();
2994 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2995 continue;
2996
2997 if (X86FI->isCandidateForPush2Pop2(Reg)) {
2998 Register Reg2 = (++RI)->getReg();
3000 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3001 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3003 } else {
3004 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3005 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3007 }
3008 }
3009
3010 if (X86FI->getRestoreBasePointer()) {
3011 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3012 Register BaseReg = this->TRI->getBaseRegister();
3013 BuildMI(MBB, MI, DL, TII.get(Opc))
3014 .addReg(BaseReg, getKillRegState(true))
3016 }
3017
3018 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3019 // It can be done by spilling XMMs to stack frame.
3020 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3021 Register Reg = I.getReg();
3022 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3023 continue;
3024
3025 // If this is k-register make sure we lookup via the largest legal type.
3026 MVT VT = MVT::Other;
3027 if (X86::VK16RegClass.contains(Reg))
3028 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3029
3030 // Add the callee-saved register as live-in. It's killed at the spill.
3031 MBB.addLiveIn(Reg);
3032 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3033
3034 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3035 Register());
3036 --MI;
3037 MI->setFlag(MachineInstr::FrameSetup);
3038 ++MI;
3039 }
3040
3041 return true;
3042}
3043
3044void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3046 MachineInstr *CatchRet) const {
3047 // SEH shouldn't use catchret.
3050 "SEH should not use CATCHRET");
3051 const DebugLoc &DL = CatchRet->getDebugLoc();
3052 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3053
3054 // Fill EAX/RAX with the address of the target block.
3055 if (STI.is64Bit()) {
3056 // LEA64r CatchRetTarget(%rip), %rax
3057 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3058 .addReg(X86::RIP)
3059 .addImm(0)
3060 .addReg(0)
3061 .addMBB(CatchRetTarget)
3062 .addReg(0);
3063 } else {
3064 // MOV32ri $CatchRetTarget, %eax
3065 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3066 .addMBB(CatchRetTarget);
3067 }
3068
3069 // Record that we've taken the address of CatchRetTarget and no longer just
3070 // reference it in a terminator.
3071 CatchRetTarget->setMachineBlockAddressTaken();
3072}
3073
3077 if (CSI.empty())
3078 return false;
3079
3080 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3081 // Don't restore CSRs in 32-bit EH funclets. Matches
3082 // spillCalleeSavedRegisters.
3083 if (STI.is32Bit())
3084 return true;
3085 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3086 // funclets. emitEpilogue transforms these to normal jumps.
3087 if (MI->getOpcode() == X86::CATCHRET) {
3088 const Function &F = MBB.getParent()->getFunction();
3089 bool IsSEH = isAsynchronousEHPersonality(
3090 classifyEHPersonality(F.getPersonalityFn()));
3091 if (IsSEH)
3092 return true;
3093 }
3094 }
3095
3097
3098 // Reload XMMs from stack frame.
3099 for (const CalleeSavedInfo &I : CSI) {
3100 Register Reg = I.getReg();
3101 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3102 continue;
3103
3104 // If this is k-register make sure we lookup via the largest legal type.
3105 MVT VT = MVT::Other;
3106 if (X86::VK16RegClass.contains(Reg))
3107 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3108
3109 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3110 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3111 Register());
3112 }
3113
3114 // Clear the stack slot for spill base pointer register.
3115 MachineFunction &MF = *MBB.getParent();
3117 if (X86FI->getRestoreBasePointer()) {
3118 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3119 Register BaseReg = this->TRI->getBaseRegister();
3120 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3122 }
3123
3124 // POP GPRs.
3125 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3126 Register Reg = I->getReg();
3127 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3128 continue;
3129
3130 if (X86FI->isCandidateForPush2Pop2(Reg))
3131 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3134 else
3135 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3137 }
3138 if (X86FI->padForPush2Pop2())
3139 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3140
3141 return true;
3142}
3143
3145 BitVector &SavedRegs,
3146 RegScavenger *RS) const {
3148
3149 // Spill the BasePtr if it's used.
3150 if (TRI->hasBasePointer(MF)) {
3151 Register BasePtr = TRI->getBaseRegister();
3152 if (STI.isTarget64BitILP32())
3153 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3154 SavedRegs.set(BasePtr);
3155 }
3156}
3157
3158static bool HasNestArgument(const MachineFunction *MF) {
3159 const Function &F = MF->getFunction();
3160 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3161 I++) {
3162 if (I->hasNestAttr() && !I->use_empty())
3163 return true;
3164 }
3165 return false;
3166}
3167
3168/// GetScratchRegister - Get a temp register for performing work in the
3169/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3170/// and the properties of the function either one or two registers will be
3171/// needed. Set primary to true for the first register, false for the second.
3172static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3173 const MachineFunction &MF, bool Primary) {
3174 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3175
3176 // Erlang stuff.
3177 if (CallingConvention == CallingConv::HiPE) {
3178 if (Is64Bit)
3179 return Primary ? X86::R14 : X86::R13;
3180 else
3181 return Primary ? X86::EBX : X86::EDI;
3182 }
3183
3184 if (Is64Bit) {
3185 if (IsLP64)
3186 return Primary ? X86::R11 : X86::R12;
3187 else
3188 return Primary ? X86::R11D : X86::R12D;
3189 }
3190
3191 bool IsNested = HasNestArgument(&MF);
3192
3193 if (CallingConvention == CallingConv::X86_FastCall ||
3194 CallingConvention == CallingConv::Fast ||
3195 CallingConvention == CallingConv::Tail) {
3196 if (IsNested)
3197 report_fatal_error("Segmented stacks does not support fastcall with "
3198 "nested function.");
3199 return Primary ? X86::EAX : X86::ECX;
3200 }
3201 if (IsNested)
3202 return Primary ? X86::EDX : X86::EAX;
3203 return Primary ? X86::ECX : X86::EAX;
3204}
3205
3206// The stack limit in the TCB is set to this many bytes above the actual stack
3207// limit.
3209
3211 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3212 MachineFrameInfo &MFI = MF.getFrameInfo();
3213 uint64_t StackSize;
3214 unsigned TlsReg, TlsOffset;
3215 DebugLoc DL;
3216
3217 // To support shrink-wrapping we would need to insert the new blocks
3218 // at the right place and update the branches to PrologueMBB.
3219 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3220
3221 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3222 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3223 "Scratch register is live-in");
3224
3225 if (MF.getFunction().isVarArg())
3226 report_fatal_error("Segmented stacks do not support vararg functions.");
3227 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3230 report_fatal_error("Segmented stacks not supported on this platform.");
3231
3232 // Eventually StackSize will be calculated by a link-time pass; which will
3233 // also decide whether checking code needs to be injected into this particular
3234 // prologue.
3235 StackSize = MFI.getStackSize();
3236
3237 if (!MFI.needsSplitStackProlog())
3238 return;
3239
3243 bool IsNested = false;
3244
3245 // We need to know if the function has a nest argument only in 64 bit mode.
3246 if (Is64Bit)
3247 IsNested = HasNestArgument(&MF);
3248
3249 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3250 // allocMBB needs to be last (terminating) instruction.
3251
3252 for (const auto &LI : PrologueMBB.liveins()) {
3253 allocMBB->addLiveIn(LI);
3254 checkMBB->addLiveIn(LI);
3255 }
3256
3257 if (IsNested)
3258 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3259
3260 MF.push_front(allocMBB);
3261 MF.push_front(checkMBB);
3262
3263 // When the frame size is less than 256 we just compare the stack
3264 // boundary directly to the value of the stack pointer, per gcc.
3265 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3266
3267 // Read the limit off the current stacklet off the stack_guard location.
3268 if (Is64Bit) {
3269 if (STI.isTargetLinux()) {
3270 TlsReg = X86::FS;
3271 TlsOffset = IsLP64 ? 0x70 : 0x40;
3272 } else if (STI.isTargetDarwin()) {
3273 TlsReg = X86::GS;
3274 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3275 } else if (STI.isTargetWin64()) {
3276 TlsReg = X86::GS;
3277 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3278 } else if (STI.isTargetFreeBSD()) {
3279 TlsReg = X86::FS;
3280 TlsOffset = 0x18;
3281 } else if (STI.isTargetDragonFly()) {
3282 TlsReg = X86::FS;
3283 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3284 } else {
3285 report_fatal_error("Segmented stacks not supported on this platform.");
3286 }
3287
3288 if (CompareStackPointer)
3289 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3290 else
3291 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3292 ScratchReg)
3293 .addReg(X86::RSP)
3294 .addImm(1)
3295 .addReg(0)
3296 .addImm(-StackSize)
3297 .addReg(0);
3298
3299 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3300 .addReg(ScratchReg)
3301 .addReg(0)
3302 .addImm(1)
3303 .addReg(0)
3304 .addImm(TlsOffset)
3305 .addReg(TlsReg);
3306 } else {
3307 if (STI.isTargetLinux()) {
3308 TlsReg = X86::GS;
3309 TlsOffset = 0x30;
3310 } else if (STI.isTargetDarwin()) {
3311 TlsReg = X86::GS;
3312 TlsOffset = 0x48 + 90 * 4;
3313 } else if (STI.isTargetWin32()) {
3314 TlsReg = X86::FS;
3315 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3316 } else if (STI.isTargetDragonFly()) {
3317 TlsReg = X86::FS;
3318 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3319 } else if (STI.isTargetFreeBSD()) {
3320 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3321 } else {
3322 report_fatal_error("Segmented stacks not supported on this platform.");
3323 }
3324
3325 if (CompareStackPointer)
3326 ScratchReg = X86::ESP;
3327 else
3328 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3329 .addReg(X86::ESP)
3330 .addImm(1)
3331 .addReg(0)
3332 .addImm(-StackSize)
3333 .addReg(0);
3334
3337 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3338 .addReg(ScratchReg)
3339 .addReg(0)
3340 .addImm(0)
3341 .addReg(0)
3342 .addImm(TlsOffset)
3343 .addReg(TlsReg);
3344 } else if (STI.isTargetDarwin()) {
3345
3346 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3347 unsigned ScratchReg2;
3348 bool SaveScratch2;
3349 if (CompareStackPointer) {
3350 // The primary scratch register is available for holding the TLS offset.
3351 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3352 SaveScratch2 = false;
3353 } else {
3354 // Need to use a second register to hold the TLS offset
3355 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3356
3357 // Unfortunately, with fastcc the second scratch register may hold an
3358 // argument.
3359 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3360 }
3361
3362 // If Scratch2 is live-in then it needs to be saved.
3363 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3364 "Scratch register is live-in and not saved");
3365
3366 if (SaveScratch2)
3367 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3368 .addReg(ScratchReg2, RegState::Kill);
3369
3370 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3371 .addImm(TlsOffset);
3372 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3373 .addReg(ScratchReg)
3374 .addReg(ScratchReg2)
3375 .addImm(1)
3376 .addReg(0)
3377 .addImm(0)
3378 .addReg(TlsReg);
3379
3380 if (SaveScratch2)
3381 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3382 }
3383 }
3384
3385 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3386 // It jumps to normal execution of the function body.
3387 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3388 .addMBB(&PrologueMBB)
3390
3391 // On 32 bit we first push the arguments size and then the frame size. On 64
3392 // bit, we pass the stack frame size in r10 and the argument size in r11.
3393 if (Is64Bit) {
3394 // Functions with nested arguments use R10, so it needs to be saved across
3395 // the call to _morestack
3396
3397 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3398 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3399 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3400 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3401
3402 if (IsNested)
3403 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3404
3405 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3406 .addImm(StackSize);
3407 BuildMI(allocMBB, DL,
3409 Reg11)
3410 .addImm(X86FI->getArgumentStackSize());
3411 } else {
3412 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3413 .addImm(X86FI->getArgumentStackSize());
3414 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3415 }
3416
3417 // __morestack is in libgcc
3419 // Under the large code model, we cannot assume that __morestack lives
3420 // within 2^31 bytes of the call site, so we cannot use pc-relative
3421 // addressing. We cannot perform the call via a temporary register,
3422 // as the rax register may be used to store the static chain, and all
3423 // other suitable registers may be either callee-save or used for
3424 // parameter passing. We cannot use the stack at this point either
3425 // because __morestack manipulates the stack directly.
3426 //
3427 // To avoid these issues, perform an indirect call via a read-only memory
3428 // location containing the address.
3429 //
3430 // This solution is not perfect, as it assumes that the .rodata section
3431 // is laid out within 2^31 bytes of each function body, but this seems
3432 // to be sufficient for JIT.
3433 // FIXME: Add retpoline support and remove the error here..
3435 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3436 "code model and thunks not yet implemented.");
3437 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3438 .addReg(X86::RIP)
3439 .addImm(0)
3440 .addReg(0)
3441 .addExternalSymbol("__morestack_addr")
3442 .addReg(0);
3443 } else {
3444 if (Is64Bit)
3445 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3446 .addExternalSymbol("__morestack");
3447 else
3448 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3449 .addExternalSymbol("__morestack");
3450 }
3451
3452 if (IsNested)
3453 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3454 else
3455 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3456
3457 allocMBB->addSuccessor(&PrologueMBB);
3458
3459 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3460 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3461
3462#ifdef EXPENSIVE_CHECKS
3463 MF.verify();
3464#endif
3465}
3466
3467/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3468/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3469/// to fields it needs, through a named metadata node "hipe.literals" containing
3470/// name-value pairs.
3471static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3472 const StringRef LiteralName) {
3473 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3474 MDNode *Node = HiPELiteralsMD->getOperand(i);
3475 if (Node->getNumOperands() != 2)
3476 continue;
3477 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3478 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3479 if (!NodeName || !NodeVal)
3480 continue;
3481 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3482 if (ValConst && NodeName->getString() == LiteralName) {
3483 return ValConst->getZExtValue();
3484 }
3485 }
3486
3487 report_fatal_error("HiPE literal " + LiteralName +
3488 " required but not provided");
3489}
3490
3491// Return true if there are no non-ehpad successors to MBB and there are no
3492// non-meta instructions between MBBI and MBB.end().
3495 return llvm::all_of(
3496 MBB.successors(),
3497 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3498 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3499 return MI.isMetaInstruction();
3500 });
3501}
3502
3503/// Erlang programs may need a special prologue to handle the stack size they
3504/// might need at runtime. That is because Erlang/OTP does not implement a C
3505/// stack but uses a custom implementation of hybrid stack/heap architecture.
3506/// (for more information see Eric Stenman's Ph.D. thesis:
3507/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3508///
3509/// CheckStack:
3510/// temp0 = sp - MaxStack
3511/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3512/// OldStart:
3513/// ...
3514/// IncStack:
3515/// call inc_stack # doubles the stack space
3516/// temp0 = sp - MaxStack
3517/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3519 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3520 MachineFrameInfo &MFI = MF.getFrameInfo();
3521 DebugLoc DL;
3522
3523 // To support shrink-wrapping we would need to insert the new blocks
3524 // at the right place and update the branches to PrologueMBB.
3525 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3526
3527 // HiPE-specific values
3528 NamedMDNode *HiPELiteralsMD =
3529 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3530 if (!HiPELiteralsMD)
3532 "Can't generate HiPE prologue without runtime parameters");
3533 const unsigned HipeLeafWords = getHiPELiteral(
3534 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3535 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3536 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3537 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3538 ? MF.getFunction().arg_size() - CCRegisteredArgs
3539 : 0;
3540 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3541
3543 "HiPE prologue is only supported on Linux operating systems.");
3544
3545 // Compute the largest caller's frame that is needed to fit the callees'
3546 // frames. This 'MaxStack' is computed from:
3547 //
3548 // a) the fixed frame size, which is the space needed for all spilled temps,
3549 // b) outgoing on-stack parameter areas, and
3550 // c) the minimum stack space this function needs to make available for the
3551 // functions it calls (a tunable ABI property).
3552 if (MFI.hasCalls()) {
3553 unsigned MoreStackForCalls = 0;
3554
3555 for (auto &MBB : MF) {
3556 for (auto &MI : MBB) {
3557 if (!MI.isCall())
3558 continue;
3559
3560 // Get callee operand.
3561 const MachineOperand &MO = MI.getOperand(0);
3562
3563 // Only take account of global function calls (no closures etc.).
3564 if (!MO.isGlobal())
3565 continue;
3566
3567 const Function *F = dyn_cast<Function>(MO.getGlobal());
3568 if (!F)
3569 continue;
3570
3571 // Do not update 'MaxStack' for primitive and built-in functions
3572 // (encoded with names either starting with "erlang."/"bif_" or not
3573 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3574 // "_", such as the BIF "suspend_0") as they are executed on another
3575 // stack.
3576 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3577 F->getName().find_first_of("._") == StringRef::npos)
3578 continue;
3579
3580 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3581 ? F->arg_size() - CCRegisteredArgs
3582 : 0;
3583 if (HipeLeafWords - 1 > CalleeStkArity)
3584 MoreStackForCalls =
3585 std::max(MoreStackForCalls,
3586 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3587 }
3588 }
3589 MaxStack += MoreStackForCalls;
3590 }
3591
3592 // If the stack frame needed is larger than the guaranteed then runtime checks
3593 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3594 if (MaxStack > Guaranteed) {
3595 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3596 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3597
3598 for (const auto &LI : PrologueMBB.liveins()) {
3599 stackCheckMBB->addLiveIn(LI);
3600 incStackMBB->addLiveIn(LI);
3601 }
3602
3603 MF.push_front(incStackMBB);
3604 MF.push_front(stackCheckMBB);
3605
3606 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3607 unsigned LEAop, CMPop, CALLop;
3608 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3609 if (Is64Bit) {
3610 SPReg = X86::RSP;
3611 PReg = X86::RBP;
3612 LEAop = X86::LEA64r;
3613 CMPop = X86::CMP64rm;
3614 CALLop = X86::CALL64pcrel32;
3615 } else {
3616 SPReg = X86::ESP;
3617 PReg = X86::EBP;
3618 LEAop = X86::LEA32r;
3619 CMPop = X86::CMP32rm;
3620 CALLop = X86::CALLpcrel32;
3621 }
3622
3623 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3624 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3625 "HiPE prologue scratch register is live-in");
3626
3627 // Create new MBB for StackCheck:
3628 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3629 false, -MaxStack);
3630 // SPLimitOffset is in a fixed heap location (pointed by BP).
3631 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3632 PReg, false, SPLimitOffset);
3633 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3634 .addMBB(&PrologueMBB)
3636
3637 // Create new MBB for IncStack:
3638 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3639 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3640 false, -MaxStack);
3641 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3642 PReg, false, SPLimitOffset);
3643 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3644 .addMBB(incStackMBB)
3646
3647 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3648 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3649 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3650 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3651 }
3652#ifdef EXPENSIVE_CHECKS
3653 MF.verify();
3654#endif
3655}
3656
3657bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3659 const DebugLoc &DL,
3660 int Offset) const {
3661 if (Offset <= 0)
3662 return false;
3663
3664 if (Offset % SlotSize)
3665 return false;
3666
3667 int NumPops = Offset / SlotSize;
3668 // This is only worth it if we have at most 2 pops.
3669 if (NumPops != 1 && NumPops != 2)
3670 return false;
3671
3672 // Handle only the trivial case where the adjustment directly follows
3673 // a call. This is the most common one, anyway.
3674 if (MBBI == MBB.begin())
3675 return false;
3676 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3677 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3678 return false;
3679
3680 unsigned Regs[2];
3681 unsigned FoundRegs = 0;
3682
3684 const MachineOperand &RegMask = Prev->getOperand(1);
3685
3686 auto &RegClass =
3687 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3688 // Try to find up to NumPops free registers.
3689 for (auto Candidate : RegClass) {
3690 // Poor man's liveness:
3691 // Since we're immediately after a call, any register that is clobbered
3692 // by the call and not defined by it can be considered dead.
3693 if (!RegMask.clobbersPhysReg(Candidate))
3694 continue;
3695
3696 // Don't clobber reserved registers
3697 if (MRI.isReserved(Candidate))
3698 continue;
3699
3700 bool IsDef = false;
3701 for (const MachineOperand &MO : Prev->implicit_operands()) {
3702 if (MO.isReg() && MO.isDef() &&
3703 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3704 IsDef = true;
3705 break;
3706 }
3707 }
3708
3709 if (IsDef)
3710 continue;
3711
3712 Regs[FoundRegs++] = Candidate;
3713 if (FoundRegs == (unsigned)NumPops)
3714 break;
3715 }
3716
3717 if (FoundRegs == 0)
3718 return false;
3719
3720 // If we found only one free register, but need two, reuse the same one twice.
3721 while (FoundRegs < (unsigned)NumPops)
3722 Regs[FoundRegs++] = Regs[0];
3723
3724 for (int i = 0; i < NumPops; ++i)
3725 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3726 Regs[i]);
3727
3728 return true;
3729}
3730
3734 bool reserveCallFrame = hasReservedCallFrame(MF);
3735 unsigned Opcode = I->getOpcode();
3736 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3737 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3738 uint64_t Amount = TII.getFrameSize(*I);
3739 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3740 I = MBB.erase(I);
3741 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3742
3743 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3744 // typically because the function is marked noreturn (abort, throw,
3745 // assert_fail, etc).
3746 if (isDestroy && blockEndIsUnreachable(MBB, I))
3747 return I;
3748
3749 if (!reserveCallFrame) {
3750 // If the stack pointer can be changed after prologue, turn the
3751 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3752 // adjcallstackdown instruction into 'add ESP, <amt>'
3753
3754 // We need to keep the stack aligned properly. To do this, we round the
3755 // amount of space needed for the outgoing arguments up to the next
3756 // alignment boundary.
3757 Amount = alignTo(Amount, getStackAlign());
3758
3759 const Function &F = MF.getFunction();
3760 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3761 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3762
3763 // If we have any exception handlers in this function, and we adjust
3764 // the SP before calls, we may need to indicate this to the unwinder
3765 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3766 // Amount == 0, because the preceding function may have set a non-0
3767 // GNU_ARGS_SIZE.
3768 // TODO: We don't need to reset this between subsequent functions,
3769 // if it didn't change.
3770 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3771
3772 if (HasDwarfEHHandlers && !isDestroy &&
3774 BuildCFI(MBB, InsertPos, DL,
3775 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3776
3777 if (Amount == 0)
3778 return I;
3779
3780 // Factor out the amount that gets handled inside the sequence
3781 // (Pushes of argument for frame setup, callee pops for frame destroy)
3782 Amount -= InternalAmt;
3783
3784 // TODO: This is needed only if we require precise CFA.
3785 // If this is a callee-pop calling convention, emit a CFA adjust for
3786 // the amount the callee popped.
3787 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3788 BuildCFI(MBB, InsertPos, DL,
3789 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3790
3791 // Add Amount to SP to destroy a frame, or subtract to setup.
3792 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3793
3794 if (StackAdjustment) {
3795 // Merge with any previous or following adjustment instruction. Note: the
3796 // instructions merged with here do not have CFI, so their stack
3797 // adjustments do not feed into CfaAdjustment.
3798 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3799 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3800
3801 if (StackAdjustment) {
3802 if (!(F.hasMinSize() &&
3803 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3804 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3805 /*InEpilogue=*/false);
3806 }
3807 }
3808
3809 if (DwarfCFI && !hasFP(MF)) {
3810 // If we don't have FP, but need to generate unwind information,
3811 // we need to set the correct CFA offset after the stack adjustment.
3812 // How much we adjust the CFA offset depends on whether we're emitting
3813 // CFI only for EH purposes or for debugging. EH only requires the CFA
3814 // offset to be correct at each call site, while for debugging we want
3815 // it to be more precise.
3816
3817 int64_t CfaAdjustment = -StackAdjustment;
3818 // TODO: When not using precise CFA, we also need to adjust for the
3819 // InternalAmt here.
3820 if (CfaAdjustment) {
3821 BuildCFI(
3822 MBB, InsertPos, DL,
3823 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3824 }
3825 }
3826
3827 return I;
3828 }
3829
3830 if (InternalAmt) {
3833 while (CI != B && !std::prev(CI)->isCall())
3834 --CI;
3835 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3836 }
3837
3838 return I;
3839}
3840
3842 assert(MBB.getParent() && "Block is not attached to a function!");
3843 const MachineFunction &MF = *MBB.getParent();
3844 if (!MBB.isLiveIn(X86::EFLAGS))
3845 return true;
3846
3847 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3848 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3850 const X86TargetLowering &TLI = *STI.getTargetLowering();
3851 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3852 return false;
3853
3855 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3856}
3857
3859 assert(MBB.getParent() && "Block is not attached to a function!");
3860
3861 // Win64 has strict requirements in terms of epilogue and we are
3862 // not taking a chance at messing with them.
3863 // I.e., unless this block is already an exit block, we can't use
3864 // it as an epilogue.
3865 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3866 return false;
3867
3868 // Swift async context epilogue has a BTR instruction that clobbers parts of
3869 // EFLAGS.
3870 const MachineFunction &MF = *MBB.getParent();
3873
3875 return true;
3876
3877 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3878 // clobbers the EFLAGS. Check that we do not need to preserve it,
3879 // otherwise, conservatively assume this is not
3880 // safe to insert the epilogue here.
3882}
3883
3885 // If we may need to emit frameless compact unwind information, give
3886 // up as this is currently broken: PR25614.
3887 bool CompactUnwind =
3889 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3890 !CompactUnwind) &&
3891 // The lowering of segmented stack and HiPE only support entry
3892 // blocks as prologue blocks: PR26107. This limitation may be
3893 // lifted if we fix:
3894 // - adjustForSegmentedStacks
3895 // - adjustForHiPEPrologue
3897 !MF.shouldSplitStack();
3898}
3899
3902 const DebugLoc &DL, bool RestoreSP) const {
3903 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3904 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3905 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3906 "restoring EBP/ESI on non-32-bit target");
3907
3908 MachineFunction &MF = *MBB.getParent();
3910 Register BasePtr = TRI->getBaseRegister();
3911 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3913 MachineFrameInfo &MFI = MF.getFrameInfo();
3914
3915 // FIXME: Don't set FrameSetup flag in catchret case.
3916
3917 int FI = FuncInfo.EHRegNodeFrameIndex;
3918 int EHRegSize = MFI.getObjectSize(FI);
3919
3920 if (RestoreSP) {
3921 // MOV32rm -EHRegSize(%ebp), %esp
3922 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3923 X86::EBP, true, -EHRegSize)
3925 }
3926
3927 Register UsedReg;
3928 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3929 int EndOffset = -EHRegOffset - EHRegSize;
3930 FuncInfo.EHRegNodeEndOffset = EndOffset;
3931
3932 if (UsedReg == FramePtr) {
3933 // ADD $offset, %ebp
3934 unsigned ADDri = getADDriOpcode(false);
3935 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3937 .addImm(EndOffset)
3939 ->getOperand(3)
3940 .setIsDead();
3941 assert(EndOffset >= 0 &&
3942 "end of registration object above normal EBP position!");
3943 } else if (UsedReg == BasePtr) {
3944 // LEA offset(%ebp), %esi
3945 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3946 FramePtr, false, EndOffset)
3948 // MOV32rm SavedEBPOffset(%esi), %ebp
3949 assert(X86FI->getHasSEHFramePtrSave());
3950 int Offset =
3951 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3952 .getFixed();
3953 assert(UsedReg == BasePtr);
3954 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3955 UsedReg, true, Offset)
3957 } else {
3958 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3959 }
3960 return MBBI;
3961}
3962
3964 return TRI->getSlotSize();
3965}
3966
3969 return StackPtr;
3970}
3971
3975 Register FrameRegister = RI->getFrameRegister(MF);
3976 if (getInitialCFARegister(MF) == FrameRegister &&
3978 DwarfFrameBase FrameBase;
3979 FrameBase.Kind = DwarfFrameBase::CFA;
3980 FrameBase.Location.Offset =
3982 return FrameBase;
3983 }
3984
3985 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
3986}
3987
3988namespace {
3989// Struct used by orderFrameObjects to help sort the stack objects.
3990struct X86FrameSortingObject {
3991 bool IsValid = false; // true if we care about this Object.
3992 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3993 unsigned ObjectSize = 0; // Size of Object in bytes.
3994 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3995 unsigned ObjectNumUses = 0; // Object static number of uses.
3996};
3997
3998// The comparison function we use for std::sort to order our local
3999// stack symbols. The current algorithm is to use an estimated
4000// "density". This takes into consideration the size and number of
4001// uses each object has in order to roughly minimize code size.
4002// So, for example, an object of size 16B that is referenced 5 times
4003// will get higher priority than 4 4B objects referenced 1 time each.
4004// It's not perfect and we may be able to squeeze a few more bytes out of
4005// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4006// fringe end can have special consideration, given their size is less
4007// important, etc.), but the algorithmic complexity grows too much to be
4008// worth the extra gains we get. This gets us pretty close.
4009// The final order leaves us with objects with highest priority going
4010// at the end of our list.
4011struct X86FrameSortingComparator {
4012 inline bool operator()(const X86FrameSortingObject &A,
4013 const X86FrameSortingObject &B) const {
4014 uint64_t DensityAScaled, DensityBScaled;
4015
4016 // For consistency in our comparison, all invalid objects are placed
4017 // at the end. This also allows us to stop walking when we hit the
4018 // first invalid item after it's all sorted.
4019 if (!A.IsValid)
4020 return false;
4021 if (!B.IsValid)
4022 return true;
4023
4024 // The density is calculated by doing :
4025 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4026 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4027 // Since this approach may cause inconsistencies in
4028 // the floating point <, >, == comparisons, depending on the floating
4029 // point model with which the compiler was built, we're going
4030 // to scale both sides by multiplying with
4031 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4032 // the division and, with it, the need for any floating point
4033 // arithmetic.
4034 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4035 static_cast<uint64_t>(B.ObjectSize);
4036 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4037 static_cast<uint64_t>(A.ObjectSize);
4038
4039 // If the two densities are equal, prioritize highest alignment
4040 // objects. This allows for similar alignment objects
4041 // to be packed together (given the same density).
4042 // There's room for improvement here, also, since we can pack
4043 // similar alignment (different density) objects next to each
4044 // other to save padding. This will also require further
4045 // complexity/iterations, and the overall gain isn't worth it,
4046 // in general. Something to keep in mind, though.
4047 if (DensityAScaled == DensityBScaled)
4048 return A.ObjectAlignment < B.ObjectAlignment;
4049
4050 return DensityAScaled < DensityBScaled;
4051 }
4052};
4053} // namespace
4054
4055// Order the symbols in the local stack.
4056// We want to place the local stack objects in some sort of sensible order.
4057// The heuristic we use is to try and pack them according to static number
4058// of uses and size of object in order to minimize code size.
4060 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4061 const MachineFrameInfo &MFI = MF.getFrameInfo();
4062
4063 // Don't waste time if there's nothing to do.
4064 if (ObjectsToAllocate.empty())
4065 return;
4066
4067 // Create an array of all MFI objects. We won't need all of these
4068 // objects, but we're going to create a full array of them to make
4069 // it easier to index into when we're counting "uses" down below.
4070 // We want to be able to easily/cheaply access an object by simply
4071 // indexing into it, instead of having to search for it every time.
4072 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4073
4074 // Walk the objects we care about and mark them as such in our working
4075 // struct.
4076 for (auto &Obj : ObjectsToAllocate) {
4077 SortingObjects[Obj].IsValid = true;
4078 SortingObjects[Obj].ObjectIndex = Obj;
4079 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4080 // Set the size.
4081 int ObjectSize = MFI.getObjectSize(Obj);
4082 if (ObjectSize == 0)
4083 // Variable size. Just use 4.
4084 SortingObjects[Obj].ObjectSize = 4;
4085 else
4086 SortingObjects[Obj].ObjectSize = ObjectSize;
4087 }
4088
4089 // Count the number of uses for each object.
4090 for (auto &MBB : MF) {
4091 for (auto &MI : MBB) {
4092 if (MI.isDebugInstr())
4093 continue;
4094 for (const MachineOperand &MO : MI.operands()) {
4095 // Check to see if it's a local stack symbol.
4096 if (!MO.isFI())
4097 continue;
4098 int Index = MO.getIndex();
4099 // Check to see if it falls within our range, and is tagged
4100 // to require ordering.
4101 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4102 SortingObjects[Index].IsValid)
4103 SortingObjects[Index].ObjectNumUses++;
4104 }
4105 }
4106 }
4107
4108 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4109 // info).
4110 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4111
4112 // Now modify the original list to represent the final order that
4113 // we want. The order will depend on whether we're going to access them
4114 // from the stack pointer or the frame pointer. For SP, the list should
4115 // end up with the END containing objects that we want with smaller offsets.
4116 // For FP, it should be flipped.
4117 int i = 0;
4118 for (auto &Obj : SortingObjects) {
4119 // All invalid items are sorted at the end, so it's safe to stop.
4120 if (!Obj.IsValid)
4121 break;
4122 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4123 }
4124
4125 // Flip it if we're accessing off of the FP.
4126 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4127 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4128}
4129
4130unsigned
4132 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4133 unsigned Offset = 16;
4134 // RBP is immediately pushed.
4135 Offset += SlotSize;
4136 // All callee-saved registers are then pushed.
4137 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4138 // Every funclet allocates enough stack space for the largest outgoing call.
4139 Offset += getWinEHFuncletFrameSize(MF);
4140 return Offset;
4141}
4142
4144 MachineFunction &MF, RegScavenger *RS) const {
4145 // Mark the function as not having WinCFI. We will set it back to true in
4146 // emitPrologue if it gets called and emits CFI.
4147 MF.setHasWinCFI(false);
4148
4149 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4150 // aligned. The format doesn't support misaligned stack adjustments.
4153
4154 // If this function isn't doing Win64-style C++ EH, we don't need to do
4155 // anything.
4156 if (STI.is64Bit() && MF.hasEHFunclets() &&
4159 adjustFrameForMsvcCxxEh(MF);
4160 }
4161}
4162
4163void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4164 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4165 // relative to RSP after the prologue. Find the offset of the last fixed
4166 // object, so that we can allocate a slot immediately following it. If there
4167 // were no fixed objects, use offset -SlotSize, which is immediately after the
4168 // return address. Fixed objects have negative frame indices.
4169 MachineFrameInfo &MFI = MF.getFrameInfo();
4170 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4171 int64_t MinFixedObjOffset = -SlotSize;
4172 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4173 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4174
4175 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4176 for (WinEHHandlerType &H : TBME.HandlerArray) {
4177 int FrameIndex = H.CatchObj.FrameIndex;
4178 if (FrameIndex != INT_MAX) {
4179 // Ensure alignment.
4180 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4181 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4182 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4183 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4184 }
4185 }
4186 }
4187
4188 // Ensure alignment.
4189 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4190 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4191 int UnwindHelpFI =
4192 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4193 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4194
4195 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4196 // other frame setup instructions.
4197 MachineBasicBlock &MBB = MF.front();
4198 auto MBBI = MBB.begin();
4199 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4200 ++MBBI;
4201
4203 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4204 UnwindHelpFI)
4205 .addImm(-2);
4206}
4207
4209 MachineFunction &MF, RegScavenger *RS) const {
4210 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4211
4212 if (STI.is32Bit() && MF.hasEHFunclets())
4214 // We have emitted prolog and epilog. Don't need stack pointer saving
4215 // instruction any more.
4216 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4217 MI->eraseFromParent();
4218 X86FI->setStackPtrSaveMI(nullptr);
4219 }
4220}
4221
4223 MachineFunction &MF) const {
4224 // 32-bit functions have to restore stack pointers when control is transferred
4225 // back to the parent function. These blocks are identified as eh pads that
4226 // are not funclet entries.
4227 bool IsSEH = isAsynchronousEHPersonality(
4229 for (MachineBasicBlock &MBB : MF) {
4230 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4231 if (NeedsRestore)
4233 /*RestoreSP=*/IsSEH);
4234 }
4235}
4236
4237// Compute the alignment gap between current SP after spilling FP/BP and the
4238// next properly aligned stack offset.
4240 const TargetRegisterClass *RC,
4241 unsigned NumSpilledRegs) {
4243 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4244 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4245 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4246 return AlignedSize - AllocSize;
4247}
4248
4249void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4251 Register FP, Register BP,
4252 int SPAdjust) const {
4253 assert(FP.isValid() || BP.isValid());
4254
4255 MachineBasicBlock *MBB = BeforeMI->getParent();
4256 DebugLoc DL = BeforeMI->getDebugLoc();
4257
4258 // Spill FP.
4259 if (FP.isValid()) {
4260 BuildMI(*MBB, BeforeMI, DL,
4262 .addReg(FP);
4263 }
4264
4265 // Spill BP.
4266 if (BP.isValid()) {
4267 BuildMI(*MBB, BeforeMI, DL,
4269 .addReg(BP);
4270 }
4271
4272 // Make sure SP is aligned.
4273 if (SPAdjust)
4274 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4275
4276 // Emit unwinding information.
4277 if (FP.isValid() && needsDwarfCFI(MF)) {
4278 // Emit .cfi_remember_state to remember old frame.
4279 unsigned CFIIndex =
4281 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4282 .addCFIIndex(CFIIndex);
4283
4284 // Setup new CFA value with DW_CFA_def_cfa_expression:
4285 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4286 SmallString<64> CfaExpr;
4287 uint8_t buffer[16];
4288 int Offset = SPAdjust;
4289 if (BP.isValid())
4290 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4291 // If BeforeMI is a frame setup instruction, we need to adjust the position
4292 // and offset of the new cfi instruction.
4293 if (TII.isFrameSetup(*BeforeMI)) {
4294 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4295 BeforeMI = std::next(BeforeMI);
4296 }
4298 if (STI.isTarget64BitILP32())
4300 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4301 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4302 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4303 CfaExpr.push_back(dwarf::DW_OP_deref);
4304 CfaExpr.push_back(dwarf::DW_OP_consts);
4305 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4306 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4307
4308 SmallString<64> DefCfaExpr;
4309 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4310 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4311 DefCfaExpr.append(CfaExpr.str());
4312 BuildCFI(*MBB, BeforeMI, DL,
4313 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4315 }
4316}
4317
4318void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4320 Register FP, Register BP,
4321 int SPAdjust) const {
4322 assert(FP.isValid() || BP.isValid());
4323
4324 // Adjust SP so it points to spilled FP or BP.
4325 MachineBasicBlock *MBB = AfterMI->getParent();
4326 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4327 DebugLoc DL = AfterMI->getDebugLoc();
4328 if (SPAdjust)
4329 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4330
4331 // Restore BP.
4332 if (BP.isValid()) {
4333 BuildMI(*MBB, Pos, DL,
4334 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4335 }
4336
4337 // Restore FP.
4338 if (FP.isValid()) {
4339 BuildMI(*MBB, Pos, DL,
4341
4342 // Emit unwinding information.
4343 if (needsDwarfCFI(MF)) {
4344 // Restore original frame with .cfi_restore_state.
4345 unsigned CFIIndex =
4347 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4348 .addCFIIndex(CFIIndex);
4349 }
4350 }
4351}
4352
4353void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4355 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4356 assert(SpillFP || SpillBP);
4357
4358 Register FP, BP;
4359 const TargetRegisterClass *RC;
4360 unsigned NumRegs = 0;
4361
4362 if (SpillFP) {
4363 FP = TRI->getFrameRegister(MF);
4364 if (STI.isTarget64BitILP32())
4366 RC = TRI->getMinimalPhysRegClass(FP);
4367 ++NumRegs;
4368 }
4369 if (SpillBP) {
4370 BP = TRI->getBaseRegister();
4371 if (STI.isTarget64BitILP32())
4372 BP = Register(getX86SubSuperRegister(BP, 64));
4373 RC = TRI->getMinimalPhysRegClass(BP);
4374 ++NumRegs;
4375 }
4376 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4377
4378 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4379 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4380}
4381
4382bool X86FrameLowering::skipSpillFPBP(
4384 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4385 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4386 // SaveRbx = COPY RBX
4387 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4388 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4389 // We should skip this instruction sequence.
4390 int FI;
4391 unsigned Reg;
4392 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4393 MI->getOperand(1).getReg() == X86::RBX) &&
4394 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4395 ++MI;
4396 return true;
4397 }
4398 return false;
4399}
4400
4402 const TargetRegisterInfo *TRI, bool &AccessFP,
4403 bool &AccessBP) {
4404 AccessFP = AccessBP = false;
4405 if (FP) {
4406 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4407 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4408 AccessFP = true;
4409 }
4410 if (BP) {
4411 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4412 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4413 AccessBP = true;
4414 }
4415 return AccessFP || AccessBP;
4416}
4417
4418// Invoke instruction has been lowered to normal function call. We try to figure
4419// out if MI comes from Invoke.
4420// Do we have any better method?
4421static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4422 if (!MI.isCall())
4423 return false;
4424 if (InsideEHLabels)
4425 return true;
4426
4427 const MachineBasicBlock *MBB = MI.getParent();
4428 if (!MBB->hasEHPadSuccessor())
4429 return false;
4430
4431 // Check if there is another call instruction from MI to the end of MBB.
4433 for (++MBBI; MBBI != ME; ++MBBI)
4434 if (MBBI->isCall())
4435 return false;
4436 return true;
4437}
4438
4439/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4440/// interfered stack access in the range, usually generated by register spill.
4441void X86FrameLowering::checkInterferedAccess(
4443 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4444 bool SpillBP) const {
4445 if (DefMI == KillMI)
4446 return;
4447 if (TRI->hasBasePointer(MF)) {
4448 if (!SpillBP)
4449 return;
4450 } else {
4451 if (!SpillFP)
4452 return;
4453 }
4454
4455 auto MI = KillMI;
4456 while (MI != DefMI) {
4457 if (any_of(MI->operands(),
4458 [](const MachineOperand &MO) { return MO.isFI(); }))
4460 "Interference usage of base pointer/frame "
4461 "pointer.");
4462 MI++;
4463 }
4464}
4465
4466/// If a function uses base pointer and the base pointer is clobbered by inline
4467/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4468/// contains garbage value.
4469/// For example if a 32b x86 function uses base pointer esi, and esi is
4470/// clobbered by following inline asm
4471/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4472/// We need to save esi before the asm and restore it after the asm.
4473///
4474/// The problem can also occur to frame pointer if there is a function call, and
4475/// the callee uses a different calling convention and clobbers the fp.
4476///
4477/// Because normal frame objects (spill slots) are accessed through fp/bp
4478/// register, so we can't spill fp/bp to normal spill slots.
4479///
4480/// FIXME: There are 2 possible enhancements:
4481/// 1. In many cases there are different physical registers not clobbered by
4482/// inline asm, we can use one of them as base pointer. Or use a virtual
4483/// register as base pointer and let RA allocate a physical register to it.
4484/// 2. If there is no other instructions access stack with fp/bp from the
4485/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4486/// skip the save and restore operations.
4488 Register FP, BP;
4490 if (TFI.hasFP(MF))
4491 FP = TRI->getFrameRegister(MF);
4492 if (TRI->hasBasePointer(MF))
4493 BP = TRI->getBaseRegister();
4494
4495 // Currently only inline asm and function call can clobbers fp/bp. So we can
4496 // do some quick test and return early.
4497 if (!MF.hasInlineAsm()) {
4499 if (!X86FI->getFPClobberedByCall())
4500 FP = 0;
4501 if (!X86FI->getBPClobberedByCall())
4502 BP = 0;
4503 }
4504 if (!FP && !BP)
4505 return;
4506
4507 for (MachineBasicBlock &MBB : MF) {
4508 bool InsideEHLabels = false;
4509 auto MI = MBB.rbegin(), ME = MBB.rend();
4510 auto TermMI = MBB.getFirstTerminator();
4511 if (TermMI == MBB.begin())
4512 continue;
4513 MI = *(std::prev(TermMI));
4514
4515 while (MI != ME) {
4516 // Skip frame setup/destroy instructions.
4517 // Skip Invoke (call inside try block) instructions.
4518 // Skip instructions handled by target.
4519 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4521 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4522 ++MI;
4523 continue;
4524 }
4525
4526 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4527 InsideEHLabels = !InsideEHLabels;
4528 ++MI;
4529 continue;
4530 }
4531
4532 bool AccessFP, AccessBP;
4533 // Check if fp or bp is used in MI.
4534 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4535 ++MI;
4536 continue;
4537 }
4538
4539 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4540 // used.
4541 bool FPLive = false, BPLive = false;
4542 bool SpillFP = false, SpillBP = false;
4543 auto DefMI = MI, KillMI = MI;
4544 do {
4545 SpillFP |= AccessFP;
4546 SpillBP |= AccessBP;
4547
4548 // Maintain FPLive and BPLive.
4549 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4550 FPLive = false;
4551 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4552 FPLive = true;
4553 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4554 BPLive = false;
4555 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4556 BPLive = true;
4557
4558 DefMI = MI++;
4559 } while ((MI != ME) &&
4560 (FPLive || BPLive ||
4561 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4562
4563 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4564 if (FPLive && !SpillBP)
4565 continue;
4566
4567 // If the bp is clobbered by a call, we should save and restore outside of
4568 // the frame setup instructions.
4569 if (KillMI->isCall() && DefMI != ME) {
4570 auto FrameSetup = std::next(DefMI);
4571 // Look for frame setup instruction toward the start of the BB.
4572 // If we reach another call instruction, it means no frame setup
4573 // instruction for the current call instruction.
4574 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4575 !FrameSetup->isCall())
4576 ++FrameSetup;
4577 // If a frame setup instruction is found, we need to find out the
4578 // corresponding frame destroy instruction.
4579 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4580 (TII.getFrameSize(*FrameSetup) ||
4581 TII.getFrameAdjustment(*FrameSetup))) {
4582 while (!TII.isFrameInstr(*KillMI))
4583 --KillMI;
4584 DefMI = FrameSetup;
4585 MI = DefMI;
4586 ++MI;
4587 }
4588 }
4589
4590 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4591
4592 // Call target function to spill and restore FP and BP registers.
4593 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4594 }
4595 }
4596}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isFuncletReturnInstr(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr Register SPReg
static constexpr Register FPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:160
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
reverse_iterator rbegin() const
Definition: ArrayRef.h:159
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:905
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1048
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
size_t arg_size() const
Definition: Function.h:901
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:682
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:661
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:582
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:693
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:656
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:575
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition: MCDwarf.h:676
OpType getOperation() const
Definition: MCDwarf.h:710
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:687
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:598
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition: MCDwarf.h:681
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1072
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1069
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:297
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:597
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
A tuple of MDNodes.
Definition: Metadata.h:1731
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1425
unsigned getNumOperands() const
Definition: Metadata.cpp:1421
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
Represents a location in source code.
Definition: SMLoc.h:23
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
static constexpr size_t npos
Definition: StringRef.h:53
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:635
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:568
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:450
Value * getValue() const
Definition: Metadata.h:490
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:322
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:118
bool isTargetDragonFly() const
Definition: X86Subtarget.h:282
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:300
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:280
bool isTargetWin64() const
Definition: X86Subtarget.h:324
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:386
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:304
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:337
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:281
bool isTargetNaCl64() const
Definition: X86Subtarget.h:296
bool isTargetWin32() const
Definition: X86Subtarget.h:326
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:218
bool isTargetLinux() const
Definition: X86Subtarget.h:290
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:555
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
union llvm::TargetFrameLowering::DwarfFrameBase::@248 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76