LLVM 20.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/LEB128.h"
37#include <cstdlib>
38
39#define DEBUG_TYPE "x86-fl"
40
41STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
42STATISTIC(NumFrameExtraProbe,
43 "Number of extra stack probes generated in prologue");
44STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
45
46using namespace llvm;
47
49 MaybeAlign StackAlignOverride)
50 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
51 STI.is64Bit() ? -8 : -4),
52 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
55 Is64Bit = STI.is64Bit();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
60}
61
63 return !MF.getFrameInfo().hasVarSizedObjects() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
66}
67
68/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69/// call frame pseudos can be simplified. Having a FP, as in the default
70/// implementation, is not sufficient here since we can't always use it.
71/// Use a more nuanced condition.
73 const MachineFunction &MF) const {
74 return hasReservedCallFrame(MF) ||
75 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
76 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78}
79
80// needsFrameIndexResolution - Do we need to perform FI resolution for
81// this function. Normally, this is required only when the function
82// has any stack objects. However, FI resolution actually has another job,
83// not apparent from the title - it resolves callframesetup/destroy
84// that were not simplified earlier.
85// So, this is required for x86 functions that have push sequences even
86// when there are no stack objects.
88 const MachineFunction &MF) const {
89 return MF.getFrameInfo().hasStackObjects() ||
90 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
91}
92
93/// hasFPImpl - Return true if the specified function should have a dedicated
94/// frame pointer register. This is true if the function has variable sized
95/// allocas or if frame pointer elimination is disabled.
97 const MachineFrameInfo &MFI = MF.getFrameInfo();
98 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
99 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
103 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
104 MFI.hasStackMap() || MFI.hasPatchPoint() ||
105 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
106}
107
108static unsigned getSUBriOpcode(bool IsLP64) {
109 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
110}
111
112static unsigned getADDriOpcode(bool IsLP64) {
113 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
114}
115
116static unsigned getSUBrrOpcode(bool IsLP64) {
117 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
118}
119
120static unsigned getADDrrOpcode(bool IsLP64) {
121 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
122}
123
124static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
125 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
126}
127
128static unsigned getLEArOpcode(bool IsLP64) {
129 return IsLP64 ? X86::LEA64r : X86::LEA32r;
130}
131
132static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
133 if (Use64BitReg) {
134 if (isUInt<32>(Imm))
135 return X86::MOV32ri64;
136 if (isInt<32>(Imm))
137 return X86::MOV64ri32;
138 return X86::MOV64ri;
139 }
140 return X86::MOV32ri;
141}
142
143// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
144// value written by the PUSH from the stack. The processor tracks these marked
145// instructions internally and fast-forwards register data between matching PUSH
146// and POP instructions, without going through memory or through the training
147// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
148// memory-renaming optimization can be used.
149//
150// The PPX hint is purely a performance hint. Instructions with this hint have
151// the same functional semantics as those without. PPX hints set by the
152// compiler that violate the balancing rule may turn off the PPX optimization,
153// but they will not affect program semantics.
154//
155// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
156// are not considered).
157//
158// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
159// GPRs at a time to/from the stack.
160static unsigned getPUSHOpcode(const X86Subtarget &ST) {
161 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
162 : X86::PUSH32r;
163}
164static unsigned getPOPOpcode(const X86Subtarget &ST) {
165 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
166 : X86::POP32r;
167}
168static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
169 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
170}
171static unsigned getPOP2Opcode(const X86Subtarget &ST) {
172 return ST.hasPPX() ? X86::POP2P : X86::POP2;
173}
174
177 MCRegister Reg = RegMask.PhysReg;
178
179 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
180 Reg == X86::AH || Reg == X86::AL)
181 return true;
182 }
183
184 return false;
185}
186
187/// Check if the flags need to be preserved before the terminators.
188/// This would be the case, if the eflags is live-in of the region
189/// composed by the terminators or live-out of that region, without
190/// being defined by a terminator.
191static bool
193 for (const MachineInstr &MI : MBB.terminators()) {
194 bool BreakNext = false;
195 for (const MachineOperand &MO : MI.operands()) {
196 if (!MO.isReg())
197 continue;
198 Register Reg = MO.getReg();
199 if (Reg != X86::EFLAGS)
200 continue;
201
202 // This terminator needs an eflags that is not defined
203 // by a previous another terminator:
204 // EFLAGS is live-in of the region composed by the terminators.
205 if (!MO.isDef())
206 return true;
207 // This terminator defines the eflags, i.e., we don't need to preserve it.
208 // However, we still need to check this specific terminator does not
209 // read a live-in value.
210 BreakNext = true;
211 }
212 // We found a definition of the eflags, no need to preserve them.
213 if (BreakNext)
214 return false;
215 }
216
217 // None of the terminators use or define the eflags.
218 // Check if they are live-out, that would imply we need to preserve them.
219 for (const MachineBasicBlock *Succ : MBB.successors())
220 if (Succ->isLiveIn(X86::EFLAGS))
221 return true;
222
223 return false;
224}
225
226/// emitSPUpdate - Emit a series of instructions to increment / decrement the
227/// stack pointer by a constant value.
230 const DebugLoc &DL, int64_t NumBytes,
231 bool InEpilogue) const {
232 bool isSub = NumBytes < 0;
233 uint64_t Offset = isSub ? -NumBytes : NumBytes;
236
237 uint64_t Chunk = (1LL << 31) - 1;
238
242 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
243
244 // It's ok to not take into account large chunks when probing, as the
245 // allocation is split in smaller chunks anyway.
246 if (EmitInlineStackProbe && !InEpilogue) {
247
248 // This pseudo-instruction is going to be expanded, potentially using a
249 // loop, by inlineStackProbe().
250 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
251 return;
252 } else if (Offset > Chunk) {
253 // Rather than emit a long series of instructions for large offsets,
254 // load the offset into a register and do one sub/add
255 unsigned Reg = 0;
256 unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
257
258 if (isSub && !isEAXLiveIn(MBB))
259 Reg = Rax;
260 else
262 Uses64BitFramePtr ? 64 : 32);
263
264 unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr)
266 if (Reg) {
268 Reg)
269 .addImm(Offset)
270 .setMIFlag(Flag);
271 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
273 .addReg(Reg);
274 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
275 return;
276 } else if (Offset > 8 * Chunk) {
277 // If we would need more than 8 add or sub instructions (a >16GB stack
278 // frame), it's worth spilling RAX to materialize this immediate.
279 // pushq %rax
280 // movabsq +-$Offset+-SlotSize, %rax
281 // addq %rsp, %rax
282 // xchg %rax, (%rsp)
283 // movq (%rsp), %rsp
284 assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
285 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
287 .setMIFlag(Flag);
288 // Subtract is not commutative, so negate the offset and always use add.
289 // Subtract 8 less and add 8 more to account for the PUSH we just did.
290 if (isSub)
291 Offset = -(Offset - SlotSize);
292 else
295 Rax)
296 .addImm(Offset)
297 .setMIFlag(Flag);
298 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
299 .addReg(Rax)
301 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
302 // Exchange the new SP in RAX with the top of the stack.
304 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
305 StackPtr, false, 0);
306 // Load new SP from the top of the stack into RSP.
307 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
308 StackPtr, false, 0);
309 return;
310 }
311 }
312
313 while (Offset) {
314 uint64_t ThisVal = std::min(Offset, Chunk);
315 if (ThisVal == SlotSize) {
316 // Use push / pop for slot sized adjustments as a size optimization. We
317 // need to find a dead register when using pop.
318 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
320 if (Reg) {
321 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
322 : (Is64Bit ? X86::POP64r : X86::POP32r);
323 BuildMI(MBB, MBBI, DL, TII.get(Opc))
324 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
325 .setMIFlag(Flag);
326 Offset -= ThisVal;
327 continue;
328 }
329 }
330
331 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
332 .setMIFlag(Flag);
333
334 Offset -= ThisVal;
335 }
336}
337
338MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
340 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
341 assert(Offset != 0 && "zero offset stack adjustment requested");
342
343 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
344 // is tricky.
345 bool UseLEA;
346 if (!InEpilogue) {
347 // Check if inserting the prologue at the beginning
348 // of MBB would require to use LEA operations.
349 // We need to use LEA operations if EFLAGS is live in, because
350 // it means an instruction will read it before it gets defined.
351 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
352 } else {
353 // If we can use LEA for SP but we shouldn't, check that none
354 // of the terminators uses the eflags. Otherwise we will insert
355 // a ADD that will redefine the eflags and break the condition.
356 // Alternatively, we could move the ADD, but this may not be possible
357 // and is an optimization anyway.
359 if (UseLEA && !STI.useLeaForSP())
361 // If that assert breaks, that means we do not do the right thing
362 // in canUseAsEpilogue.
364 "We shouldn't have allowed this insertion point");
365 }
366
368 if (UseLEA) {
371 StackPtr),
372 StackPtr, false, Offset);
373 } else {
374 bool IsSub = Offset < 0;
375 uint64_t AbsOffset = IsSub ? -Offset : Offset;
376 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
378 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
380 .addImm(AbsOffset);
381 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
382 }
383 return MI;
384}
385
388 bool doMergeWithPrevious) const {
389 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
390 (!doMergeWithPrevious && MBBI == MBB.end()))
391 return 0;
392
393 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
394
396 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
397 // instruction, and that there are no DBG_VALUE or other instructions between
398 // ADD/SUB/LEA and its corresponding CFI instruction.
399 /* TODO: Add support for the case where there are multiple CFI instructions
400 below the ADD/SUB/LEA, e.g.:
401 ...
402 add
403 cfi_def_cfa_offset
404 cfi_offset
405 ...
406 */
407 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
408 PI = std::prev(PI);
409
410 unsigned Opc = PI->getOpcode();
411 int Offset = 0;
412
413 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
414 PI->getOperand(0).getReg() == StackPtr) {
415 assert(PI->getOperand(1).getReg() == StackPtr);
416 Offset = PI->getOperand(2).getImm();
417 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
418 PI->getOperand(0).getReg() == StackPtr &&
419 PI->getOperand(1).getReg() == StackPtr &&
420 PI->getOperand(2).getImm() == 1 &&
421 PI->getOperand(3).getReg() == X86::NoRegister &&
422 PI->getOperand(5).getReg() == X86::NoRegister) {
423 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
424 Offset = PI->getOperand(4).getImm();
425 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
426 PI->getOperand(0).getReg() == StackPtr) {
427 assert(PI->getOperand(1).getReg() == StackPtr);
428 Offset = -PI->getOperand(2).getImm();
429 } else
430 return 0;
431
432 PI = MBB.erase(PI);
433 if (PI != MBB.end() && PI->isCFIInstruction()) {
434 auto CIs = MBB.getParent()->getFrameInstructions();
435 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
438 PI = MBB.erase(PI);
439 }
440 if (!doMergeWithPrevious)
442
443 return Offset;
444}
445
448 const DebugLoc &DL,
449 const MCCFIInstruction &CFIInst,
450 MachineInstr::MIFlag Flag) const {
452 unsigned CFIIndex = MF.addFrameInst(CFIInst);
453
455 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
456
457 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
458 .addCFIIndex(CFIIndex)
459 .setMIFlag(Flag);
460}
461
462/// Emits Dwarf Info specifying offsets of callee saved registers and
463/// frame pointer. This is called only when basic block sections are enabled.
467 if (!hasFP(MF)) {
469 return;
470 }
473 const Register MachineFramePtr =
475 : FramePtr;
476 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
477 // Offset = space for return address + size of the frame pointer itself.
478 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
480 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
482}
483
486 const DebugLoc &DL, bool IsPrologue) const {
488 MachineFrameInfo &MFI = MF.getFrameInfo();
491
492 // Add callee saved registers to move list.
493 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
494
495 // Calculate offsets.
496 for (const CalleeSavedInfo &I : CSI) {
497 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
498 Register Reg = I.getReg();
499 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
500
501 if (IsPrologue) {
502 if (X86FI->getStackPtrSaveMI()) {
503 // +2*SlotSize because there is return address and ebp at the bottom
504 // of the stack.
505 // | retaddr |
506 // | ebp |
507 // | |<--ebp
508 Offset += 2 * SlotSize;
509 SmallString<64> CfaExpr;
510 CfaExpr.push_back(dwarf::DW_CFA_expression);
511 uint8_t buffer[16];
512 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
513 CfaExpr.push_back(2);
515 const Register MachineFramePtr =
518 : FramePtr;
519 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
520 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
521 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
523 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
525 } else {
527 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
528 }
529 } else {
531 MCCFIInstruction::createRestore(nullptr, DwarfReg));
532 }
533 }
534 if (auto *MI = X86FI->getStackPtrSaveMI()) {
535 int FI = MI->getOperand(1).getIndex();
536 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
537 SmallString<64> CfaExpr;
539 const Register MachineFramePtr =
542 : FramePtr;
543 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
544 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
545 uint8_t buffer[16];
546 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
547 CfaExpr.push_back(dwarf::DW_OP_deref);
548
549 SmallString<64> DefCfaExpr;
550 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
551 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
552 DefCfaExpr.append(CfaExpr.str());
553 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
555 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
557 }
558}
559
560void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
561 MachineBasicBlock &MBB) const {
562 const MachineFunction &MF = *MBB.getParent();
563
564 // Insertion point.
566
567 // Fake a debug loc.
568 DebugLoc DL;
569 if (MBBI != MBB.end())
570 DL = MBBI->getDebugLoc();
571
572 // Zero out FP stack if referenced. Do this outside of the loop below so that
573 // it's done only once.
574 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
575 for (MCRegister Reg : RegsToZero.set_bits()) {
576 if (!X86::RFP80RegClass.contains(Reg))
577 continue;
578
579 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
580 for (unsigned i = 0; i != NumFPRegs; ++i)
581 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
582
583 for (unsigned i = 0; i != NumFPRegs; ++i)
584 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
585 break;
586 }
587
588 // For GPRs, we only care to clear out the 32-bit register.
589 BitVector GPRsToZero(TRI->getNumRegs());
590 for (MCRegister Reg : RegsToZero.set_bits())
591 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
592 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
593 RegsToZero.reset(Reg);
594 }
595
596 // Zero out the GPRs first.
597 for (MCRegister Reg : GPRsToZero.set_bits())
599
600 // Zero out the remaining registers.
601 for (MCRegister Reg : RegsToZero.set_bits())
603}
604
607 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
608 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
611 if (InProlog) {
612 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
613 .addImm(0 /* no explicit stack size */);
614 } else {
615 emitStackProbeInline(MF, MBB, MBBI, DL, false);
616 }
617 } else {
618 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
619 }
620}
621
623 return STI.isOSWindows() && !STI.isTargetWin64();
624}
625
627 MachineBasicBlock &PrologMBB) const {
628 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
629 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
630 });
631 if (Where != PrologMBB.end()) {
632 DebugLoc DL = PrologMBB.findDebugLoc(Where);
633 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
634 Where->eraseFromParent();
635 }
636}
637
638void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
641 const DebugLoc &DL,
642 bool InProlog) const {
644 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
645 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
646 else
647 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
648}
649
650void X86FrameLowering::emitStackProbeInlineGeneric(
652 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
653 MachineInstr &AllocWithProbe = *MBBI;
654 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
655
658 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
659 "different expansion expected for CoreCLR 64 bit");
660
661 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
662 uint64_t ProbeChunk = StackProbeSize * 8;
663
664 uint64_t MaxAlign =
665 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
666
667 // Synthesize a loop or unroll it, depending on the number of iterations.
668 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
669 // between the unaligned rsp and current rsp.
670 if (Offset > ProbeChunk) {
671 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
672 MaxAlign % StackProbeSize);
673 } else {
674 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
675 MaxAlign % StackProbeSize);
676 }
677}
678
679void X86FrameLowering::emitStackProbeInlineGenericBlock(
682 uint64_t AlignOffset) const {
683
684 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
685 const bool HasFP = hasFP(MF);
688 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
689 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
690
691 uint64_t CurrentOffset = 0;
692
693 assert(AlignOffset < StackProbeSize);
694
695 // If the offset is so small it fits within a page, there's nothing to do.
696 if (StackProbeSize < Offset + AlignOffset) {
697
698 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
699 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
701 if (!HasFP && NeedsDwarfCFI) {
702 BuildCFI(
703 MBB, MBBI, DL,
704 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
705 }
706
707 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
709 StackPtr, false, 0)
710 .addImm(0)
712 NumFrameExtraProbe++;
713 CurrentOffset = StackProbeSize - AlignOffset;
714 }
715
716 // For the next N - 1 pages, just probe. I tried to take advantage of
717 // natural probes but it implies much more logic and there was very few
718 // interesting natural probes to interleave.
719 while (CurrentOffset + StackProbeSize < Offset) {
720 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
722
723 if (!HasFP && NeedsDwarfCFI) {
724 BuildCFI(
725 MBB, MBBI, DL,
726 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
727 }
728 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
730 StackPtr, false, 0)
731 .addImm(0)
733 NumFrameExtraProbe++;
734 CurrentOffset += StackProbeSize;
735 }
736
737 // No need to probe the tail, it is smaller than a Page.
738 uint64_t ChunkSize = Offset - CurrentOffset;
739 if (ChunkSize == SlotSize) {
740 // Use push for slot sized adjustments as a size optimization,
741 // like emitSPUpdate does when not probing.
742 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
743 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
744 BuildMI(MBB, MBBI, DL, TII.get(Opc))
747 } else {
748 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
750 }
751 // No need to adjust Dwarf CFA offset here, the last position of the stack has
752 // been defined
753}
754
755void X86FrameLowering::emitStackProbeInlineGenericLoop(
758 uint64_t AlignOffset) const {
759 assert(Offset && "null offset");
760
761 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
763 "Inline stack probe loop will clobber live EFLAGS.");
764
765 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
766 const bool HasFP = hasFP(MF);
769 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
770 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
771
772 if (AlignOffset) {
773 if (AlignOffset < StackProbeSize) {
774 // Perform a first smaller allocation followed by a probe.
775 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
777
778 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
780 StackPtr, false, 0)
781 .addImm(0)
783 NumFrameExtraProbe++;
784 Offset -= AlignOffset;
785 }
786 }
787
788 // Synthesize a loop
789 NumFrameLoopProbe++;
790 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
791
792 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
793 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
794
796 MF.insert(MBBIter, testMBB);
797 MF.insert(MBBIter, tailMBB);
798
799 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
800 : Is64Bit ? X86::R11D
801 : X86::EAX;
802
803 // save loop bound
804 {
805 const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
806
807 // Can we calculate the loop bound using SUB with a 32-bit immediate?
808 // Note that the immediate gets sign-extended when used with a 64-bit
809 // register, so in that case we only have 31 bits to work with.
810 bool canUseSub =
811 Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
812
813 if (canUseSub) {
814 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
815
816 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
819 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
820 .addReg(FinalStackProbed)
821 .addImm(BoundOffset)
823 } else if (Uses64BitFramePtr) {
824 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
825 .addImm(-BoundOffset)
827 BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
828 .addReg(FinalStackProbed)
831 } else {
832 // We're being asked to probe a stack frame that's 4 GiB or larger,
833 // but our stack pointer is only 32 bits. This might be unreachable
834 // code, so don't complain now; just trap if it's reached at runtime.
835 BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
836 }
837
838 // while in the loop, use loop-invariant reg for CFI,
839 // instead of the stack pointer, which changes during the loop
840 if (!HasFP && NeedsDwarfCFI) {
841 // x32 uses the same DWARF register numbers as x86-64,
842 // so there isn't a register number for r11d, we must use r11 instead
843 const Register DwarfFinalStackProbed =
845 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
846 : FinalStackProbed;
847
850 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
852 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
853 }
854 }
855
856 // allocate a page
857 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
858 /*InEpilogue=*/false)
860
861 // touch the page
862 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
864 StackPtr, false, 0)
865 .addImm(0)
867
868 // cmp with stack pointer bound
869 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
871 .addReg(FinalStackProbed)
873
874 // jump
875 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
876 .addMBB(testMBB)
879 testMBB->addSuccessor(testMBB);
880 testMBB->addSuccessor(tailMBB);
881
882 // BB management
883 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
885 MBB.addSuccessor(testMBB);
886
887 // handle tail
888 const uint64_t TailOffset = Offset % StackProbeSize;
889 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
890 if (TailOffset) {
891 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
892 /*InEpilogue=*/false)
894 }
895
896 // after the loop, switch back to stack pointer for CFI
897 if (!HasFP && NeedsDwarfCFI) {
898 // x32 uses the same DWARF register numbers as x86-64,
899 // so there isn't a register number for esp, we must use rsp instead
900 const Register DwarfStackPtr =
904
905 BuildCFI(*tailMBB, TailMBBIter, DL,
907 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
908 }
909
910 // Update Live In information
911 fullyRecomputeLiveIns({tailMBB, testMBB});
912}
913
914void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
916 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
918 assert(STI.is64Bit() && "different expansion needed for 32 bit");
919 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
921 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
922
923 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
925 "Inline stack probe loop will clobber live EFLAGS.");
926
927 // RAX contains the number of bytes of desired stack adjustment.
928 // The handling here assumes this value has already been updated so as to
929 // maintain stack alignment.
930 //
931 // We need to exit with RSP modified by this amount and execute suitable
932 // page touches to notify the OS that we're growing the stack responsibly.
933 // All stack probing must be done without modifying RSP.
934 //
935 // MBB:
936 // SizeReg = RAX;
937 // ZeroReg = 0
938 // CopyReg = RSP
939 // Flags, TestReg = CopyReg - SizeReg
940 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
941 // LimitReg = gs magic thread env access
942 // if FinalReg >= LimitReg goto ContinueMBB
943 // RoundBB:
944 // RoundReg = page address of FinalReg
945 // LoopMBB:
946 // LoopReg = PHI(LimitReg,ProbeReg)
947 // ProbeReg = LoopReg - PageSize
948 // [ProbeReg] = 0
949 // if (ProbeReg > RoundReg) goto LoopMBB
950 // ContinueMBB:
951 // RSP = RSP - RAX
952 // [rest of original MBB]
953
954 // Set up the new basic blocks
955 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
956 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
957 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
958
959 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
960 MF.insert(MBBIter, RoundMBB);
961 MF.insert(MBBIter, LoopMBB);
962 MF.insert(MBBIter, ContinueMBB);
963
964 // Split MBB and move the tail portion down to ContinueMBB.
965 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
966 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
968
969 // Some useful constants
970 const int64_t ThreadEnvironmentStackLimit = 0x10;
971 const int64_t PageSize = 0x1000;
972 const int64_t PageMask = ~(PageSize - 1);
973
974 // Registers we need. For the normal case we use virtual
975 // registers. For the prolog expansion we use RAX, RCX and RDX.
977 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
978 const Register
979 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
980 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
981 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
982 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
983 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
984 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
985 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
986 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
987 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
988
989 // SP-relative offsets where we can save RCX and RDX.
990 int64_t RCXShadowSlot = 0;
991 int64_t RDXShadowSlot = 0;
992
993 // If inlining in the prolog, save RCX and RDX.
994 if (InProlog) {
995 // Compute the offsets. We need to account for things already
996 // pushed onto the stack at this point: return address, frame
997 // pointer (if used), and callee saves.
999 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1000 const bool HasFP = hasFP(MF);
1001
1002 // Check if we need to spill RCX and/or RDX.
1003 // Here we assume that no earlier prologue instruction changes RCX and/or
1004 // RDX, so checking the block live-ins is enough.
1005 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1006 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1007 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1008 // Assign the initial slot to both registers, then change RDX's slot if both
1009 // need to be spilled.
1010 if (IsRCXLiveIn)
1011 RCXShadowSlot = InitSlot;
1012 if (IsRDXLiveIn)
1013 RDXShadowSlot = InitSlot;
1014 if (IsRDXLiveIn && IsRCXLiveIn)
1015 RDXShadowSlot += 8;
1016 // Emit the saves if needed.
1017 if (IsRCXLiveIn)
1018 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1019 RCXShadowSlot)
1020 .addReg(X86::RCX);
1021 if (IsRDXLiveIn)
1022 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1023 RDXShadowSlot)
1024 .addReg(X86::RDX);
1025 } else {
1026 // Not in the prolog. Copy RAX to a virtual reg.
1027 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1028 }
1029
1030 // Add code to MBB to check for overflow and set the new target stack pointer
1031 // to zero if so.
1032 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1033 .addReg(ZeroReg, RegState::Undef)
1034 .addReg(ZeroReg, RegState::Undef);
1035 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1036 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1037 .addReg(CopyReg)
1038 .addReg(SizeReg);
1039 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1040 .addReg(TestReg)
1041 .addReg(ZeroReg)
1043
1044 // FinalReg now holds final stack pointer value, or zero if
1045 // allocation would overflow. Compare against the current stack
1046 // limit from the thread environment block. Note this limit is the
1047 // lowest touched page on the stack, not the point at which the OS
1048 // will cause an overflow exception, so this is just an optimization
1049 // to avoid unnecessarily touching pages that are below the current
1050 // SP but already committed to the stack by the OS.
1051 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1052 .addReg(0)
1053 .addImm(1)
1054 .addReg(0)
1055 .addImm(ThreadEnvironmentStackLimit)
1056 .addReg(X86::GS);
1057 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1058 // Jump if the desired stack pointer is at or above the stack limit.
1059 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1060 .addMBB(ContinueMBB)
1062
1063 // Add code to roundMBB to round the final stack pointer to a page boundary.
1064 if (InProlog)
1065 RoundMBB->addLiveIn(FinalReg);
1066 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1067 .addReg(FinalReg)
1068 .addImm(PageMask);
1069 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1070
1071 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1072 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1073 // and probe until we reach RoundedReg.
1074 if (!InProlog) {
1075 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1076 .addReg(LimitReg)
1077 .addMBB(RoundMBB)
1078 .addReg(ProbeReg)
1079 .addMBB(LoopMBB);
1080 }
1081
1082 if (InProlog)
1083 LoopMBB->addLiveIn(JoinReg);
1084 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1085 false, -PageSize);
1086
1087 // Probe by storing a byte onto the stack.
1088 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1089 .addReg(ProbeReg)
1090 .addImm(1)
1091 .addReg(0)
1092 .addImm(0)
1093 .addReg(0)
1094 .addImm(0);
1095
1096 if (InProlog)
1097 LoopMBB->addLiveIn(RoundedReg);
1098 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1099 .addReg(RoundedReg)
1100 .addReg(ProbeReg);
1101 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1102 .addMBB(LoopMBB)
1104
1105 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1106
1107 // If in prolog, restore RDX and RCX.
1108 if (InProlog) {
1109 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1110 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1111 TII.get(X86::MOV64rm), X86::RCX),
1112 X86::RSP, false, RCXShadowSlot);
1113 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1114 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1115 TII.get(X86::MOV64rm), X86::RDX),
1116 X86::RSP, false, RDXShadowSlot);
1117 }
1118
1119 // Now that the probing is done, add code to continueMBB to update
1120 // the stack pointer for real.
1121 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1122 .addReg(X86::RSP)
1123 .addReg(SizeReg);
1124
1125 // Add the control flow edges we need.
1126 MBB.addSuccessor(ContinueMBB);
1127 MBB.addSuccessor(RoundMBB);
1128 RoundMBB->addSuccessor(LoopMBB);
1129 LoopMBB->addSuccessor(ContinueMBB);
1130 LoopMBB->addSuccessor(LoopMBB);
1131
1132 if (InProlog) {
1133 LivePhysRegs LiveRegs;
1134 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1135 }
1136
1137 // Mark all the instructions added to the prolog as frame setup.
1138 if (InProlog) {
1139 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1140 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1141 }
1142 for (MachineInstr &MI : *RoundMBB) {
1144 }
1145 for (MachineInstr &MI : *LoopMBB) {
1147 }
1148 for (MachineInstr &MI :
1149 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1151 }
1152 }
1153}
1154
1155void X86FrameLowering::emitStackProbeCall(
1157 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1158 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1159 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1160
1161 // FIXME: Add indirect thunk support and remove this.
1162 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1163 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1164 "code model and indirect thunks not yet implemented.");
1165
1166 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1168 "Stack probe calls will clobber live EFLAGS.");
1169
1170 unsigned CallOp;
1171 if (Is64Bit)
1172 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1173 else
1174 CallOp = X86::CALLpcrel32;
1175
1177
1179 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1180
1181 // All current stack probes take AX and SP as input, clobber flags, and
1182 // preserve all registers. x86_64 probes leave RSP unmodified.
1184 // For the large code model, we have to call through a register. Use R11,
1185 // as it is scratch in all supported calling conventions.
1186 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1188 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1189 } else {
1190 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1192 }
1193
1194 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1195 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1201
1202 MachineInstr *ModInst = CI;
1203 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1204 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1205 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1206 // themselves. They also does not clobber %rax so we can reuse it when
1207 // adjusting %rsp.
1208 // All other platforms do not specify a particular ABI for the stack probe
1209 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1210 ModInst =
1212 .addReg(SP)
1213 .addReg(AX);
1214 }
1215
1216 // DebugInfo variable locations -- if there's an instruction number for the
1217 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1218 // modifies SP.
1219 if (InstrNum) {
1220 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1221 // Label destination operand of the subtract.
1222 MF.makeDebugValueSubstitution(*InstrNum,
1223 {ModInst->getDebugInstrNum(), 0});
1224 } else {
1225 // Label the call. The operand number is the penultimate operand, zero
1226 // based.
1227 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1229 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1230 }
1231 }
1232
1233 if (InProlog) {
1234 // Apply the frame setup flag to all inserted instrs.
1235 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1236 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1237 }
1238}
1239
1240static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1241 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1242 // and might require smaller successive adjustments.
1243 const uint64_t Win64MaxSEHOffset = 128;
1244 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1245 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1246 return SEHFrameOffset & -16;
1247}
1248
1249// If we're forcing a stack realignment we can't rely on just the frame
1250// info, we need to know the ABI stack alignment as well in case we
1251// have a call out. Otherwise just make sure we have some alignment - we'll
1252// go with the minimum SlotSize.
1254X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1255 const MachineFrameInfo &MFI = MF.getFrameInfo();
1256 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1257 Align StackAlign = getStackAlign();
1258 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1259 if (HasRealign) {
1260 if (MFI.hasCalls())
1261 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1262 else if (MaxAlign < SlotSize)
1263 MaxAlign = Align(SlotSize);
1264 }
1265
1267 if (HasRealign)
1268 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1269 else
1270 MaxAlign = Align(16);
1271 }
1272 return MaxAlign.value();
1273}
1274
1275void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1277 const DebugLoc &DL, unsigned Reg,
1278 uint64_t MaxAlign) const {
1279 uint64_t Val = -MaxAlign;
1280 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1281
1282 MachineFunction &MF = *MBB.getParent();
1284 const X86TargetLowering &TLI = *STI.getTargetLowering();
1285 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1286 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1287
1288 // We want to make sure that (in worst case) less than StackProbeSize bytes
1289 // are not probed after the AND. This assumption is used in
1290 // emitStackProbeInlineGeneric.
1291 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1292 {
1293 NumFrameLoopProbe++;
1294 MachineBasicBlock *entryMBB =
1296 MachineBasicBlock *headMBB =
1298 MachineBasicBlock *bodyMBB =
1300 MachineBasicBlock *footMBB =
1302
1304 MF.insert(MBBIter, entryMBB);
1305 MF.insert(MBBIter, headMBB);
1306 MF.insert(MBBIter, bodyMBB);
1307 MF.insert(MBBIter, footMBB);
1308 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1309 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1310 : Is64Bit ? X86::R11D
1311 : X86::EAX;
1312
1313 // Setup entry block
1314 {
1315
1316 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1317 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1320 MachineInstr *MI =
1321 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1322 .addReg(FinalStackProbed)
1323 .addImm(Val)
1325
1326 // The EFLAGS implicit def is dead.
1327 MI->getOperand(3).setIsDead();
1328
1329 BuildMI(entryMBB, DL,
1330 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1331 .addReg(FinalStackProbed)
1334 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1335 .addMBB(&MBB)
1338 entryMBB->addSuccessor(headMBB);
1339 entryMBB->addSuccessor(&MBB);
1340 }
1341
1342 // Loop entry block
1343
1344 {
1345 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1346 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1348 .addImm(StackProbeSize)
1350
1351 BuildMI(headMBB, DL,
1352 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1354 .addReg(FinalStackProbed)
1356
1357 // jump to the footer if StackPtr < FinalStackProbed
1358 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1359 .addMBB(footMBB)
1362
1363 headMBB->addSuccessor(bodyMBB);
1364 headMBB->addSuccessor(footMBB);
1365 }
1366
1367 // setup loop body
1368 {
1369 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1371 StackPtr, false, 0)
1372 .addImm(0)
1374
1375 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1376 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1378 .addImm(StackProbeSize)
1380
1381 // cmp with stack pointer bound
1382 BuildMI(bodyMBB, DL,
1383 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1384 .addReg(FinalStackProbed)
1387
1388 // jump back while FinalStackProbed < StackPtr
1389 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1390 .addMBB(bodyMBB)
1393 bodyMBB->addSuccessor(bodyMBB);
1394 bodyMBB->addSuccessor(footMBB);
1395 }
1396
1397 // setup loop footer
1398 {
1399 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1400 .addReg(FinalStackProbed)
1402 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1404 StackPtr, false, 0)
1405 .addImm(0)
1407 footMBB->addSuccessor(&MBB);
1408 }
1409
1410 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1411 }
1412 } else {
1413 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1414 .addReg(Reg)
1415 .addImm(Val)
1417
1418 // The EFLAGS implicit def is dead.
1419 MI->getOperand(3).setIsDead();
1420 }
1421}
1422
1424 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1425 // clobbered by any interrupt handler.
1426 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1427 "MF used frame lowering for wrong subtarget");
1428 const Function &Fn = MF.getFunction();
1429 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1430 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1431}
1432
1433/// Return true if we need to use the restricted Windows x64 prologue and
1434/// epilogue code patterns that can be described with WinCFI (.seh_*
1435/// directives).
1436bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1437 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1438}
1439
1440bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1441 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1442}
1443
1444/// Return true if an opcode is part of the REP group of instructions
1445static bool isOpcodeRep(unsigned Opcode) {
1446 switch (Opcode) {
1447 case X86::REPNE_PREFIX:
1448 case X86::REP_MOVSB_32:
1449 case X86::REP_MOVSB_64:
1450 case X86::REP_MOVSD_32:
1451 case X86::REP_MOVSD_64:
1452 case X86::REP_MOVSQ_32:
1453 case X86::REP_MOVSQ_64:
1454 case X86::REP_MOVSW_32:
1455 case X86::REP_MOVSW_64:
1456 case X86::REP_PREFIX:
1457 case X86::REP_STOSB_32:
1458 case X86::REP_STOSB_64:
1459 case X86::REP_STOSD_32:
1460 case X86::REP_STOSD_64:
1461 case X86::REP_STOSQ_32:
1462 case X86::REP_STOSQ_64:
1463 case X86::REP_STOSW_32:
1464 case X86::REP_STOSW_64:
1465 return true;
1466 default:
1467 break;
1468 }
1469 return false;
1470}
1471
1472/// emitPrologue - Push callee-saved registers onto the stack, which
1473/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1474/// space for local variables. Also emit labels used by the exception handler to
1475/// generate the exception handling frames.
1476
1477/*
1478 Here's a gist of what gets emitted:
1479
1480 ; Establish frame pointer, if needed
1481 [if needs FP]
1482 push %rbp
1483 .cfi_def_cfa_offset 16
1484 .cfi_offset %rbp, -16
1485 .seh_pushreg %rpb
1486 mov %rsp, %rbp
1487 .cfi_def_cfa_register %rbp
1488
1489 ; Spill general-purpose registers
1490 [for all callee-saved GPRs]
1491 pushq %<reg>
1492 [if not needs FP]
1493 .cfi_def_cfa_offset (offset from RETADDR)
1494 .seh_pushreg %<reg>
1495
1496 ; If the required stack alignment > default stack alignment
1497 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1498 ; of unknown size in the stack frame.
1499 [if stack needs re-alignment]
1500 and $MASK, %rsp
1501
1502 ; Allocate space for locals
1503 [if target is Windows and allocated space > 4096 bytes]
1504 ; Windows needs special care for allocations larger
1505 ; than one page.
1506 mov $NNN, %rax
1507 call ___chkstk_ms/___chkstk
1508 sub %rax, %rsp
1509 [else]
1510 sub $NNN, %rsp
1511
1512 [if needs FP]
1513 .seh_stackalloc (size of XMM spill slots)
1514 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1515 [else]
1516 .seh_stackalloc NNN
1517
1518 ; Spill XMMs
1519 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1520 ; they may get spilled on any platform, if the current function
1521 ; calls @llvm.eh.unwind.init
1522 [if needs FP]
1523 [for all callee-saved XMM registers]
1524 movaps %<xmm reg>, -MMM(%rbp)
1525 [for all callee-saved XMM registers]
1526 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1527 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1528 [else]
1529 [for all callee-saved XMM registers]
1530 movaps %<xmm reg>, KKK(%rsp)
1531 [for all callee-saved XMM registers]
1532 .seh_savexmm %<xmm reg>, KKK
1533
1534 .seh_endprologue
1535
1536 [if needs base pointer]
1537 mov %rsp, %rbx
1538 [if needs to restore base pointer]
1539 mov %rsp, -MMM(%rbp)
1540
1541 ; Emit CFI info
1542 [if needs FP]
1543 [for all callee-saved registers]
1544 .cfi_offset %<reg>, (offset from %rbp)
1545 [else]
1546 .cfi_def_cfa_offset (offset from RETADDR)
1547 [for all callee-saved registers]
1548 .cfi_offset %<reg>, (offset from %rsp)
1549
1550 Notes:
1551 - .seh directives are emitted only for Windows 64 ABI
1552 - .cv_fpo directives are emitted on win32 when emitting CodeView
1553 - .cfi directives are emitted for all other ABIs
1554 - for 32-bit code, substitute %e?? registers for %r??
1555*/
1556
1558 MachineBasicBlock &MBB) const {
1559 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1560 "MF used frame lowering for wrong subtarget");
1562 MachineFrameInfo &MFI = MF.getFrameInfo();
1563 const Function &Fn = MF.getFunction();
1565 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1566 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1567 bool IsFunclet = MBB.isEHFuncletEntry();
1569 if (Fn.hasPersonalityFn())
1570 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1571 bool FnHasClrFunclet =
1572 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1573 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1574 bool HasFP = hasFP(MF);
1575 bool IsWin64Prologue = isWin64Prologue(MF);
1576 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1577 // FIXME: Emit FPO data for EH funclets.
1578 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1580 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1581 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1583 const Register MachineFramePtr =
1585 : FramePtr;
1586 Register BasePtr = TRI->getBaseRegister();
1587 bool HasWinCFI = false;
1588
1589 // Debug location must be unknown since the first debug location is used
1590 // to determine the end of the prologue.
1591 DebugLoc DL;
1592 Register ArgBaseReg;
1593
1594 // Emit extra prolog for argument stack slot reference.
1595 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1596 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1597 // Creat extra prolog for stack realignment.
1598 ArgBaseReg = MI->getOperand(0).getReg();
1599 // leal 4(%esp), %basereg
1600 // .cfi_def_cfa %basereg, 0
1601 // andl $-128, %esp
1602 // pushl -4(%basereg)
1603 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1604 ArgBaseReg)
1606 .addImm(1)
1607 .addUse(X86::NoRegister)
1609 .addUse(X86::NoRegister)
1611 if (NeedsDwarfCFI) {
1612 // .cfi_def_cfa %basereg, 0
1613 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1614 BuildCFI(MBB, MBBI, DL,
1615 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1617 }
1618 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1619 int64_t Offset = -(int64_t)SlotSize;
1620 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1621 .addReg(ArgBaseReg)
1622 .addImm(1)
1623 .addReg(X86::NoRegister)
1624 .addImm(Offset)
1625 .addReg(X86::NoRegister)
1627 }
1628
1629 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1630 // tail call.
1631 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1632 if (TailCallArgReserveSize && IsWin64Prologue)
1633 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1634
1635 const bool EmitStackProbeCall =
1637 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1638
1639 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1643 // The special symbol below is absolute and has a *value* suitable to be
1644 // combined with the frame pointer directly.
1645 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1646 .addUse(MachineFramePtr)
1647 .addUse(X86::RIP)
1648 .addImm(1)
1649 .addUse(X86::NoRegister)
1650 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1652 .addUse(X86::NoRegister);
1653 break;
1654 }
1655 [[fallthrough]];
1656
1658 assert(
1659 !IsWin64Prologue &&
1660 "win64 prologue does not set the bit 60 in the saved frame pointer");
1661 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1662 .addUse(MachineFramePtr)
1663 .addImm(60)
1665 break;
1666
1668 break;
1669 }
1670 }
1671
1672 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1673 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1674 // stack alignment.
1676 Fn.arg_size() == 2) {
1677 StackSize += 8;
1678 MFI.setStackSize(StackSize);
1679
1680 // Update the stack pointer by pushing a register. This is the instruction
1681 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1682 // Hard-coding the update to a push avoids emitting a second
1683 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1684 // probing isn't needed anyways for an 8-byte update.
1685 // Pushing a register leaves us in a similar situation to a regular
1686 // function call where we know that the address at (rsp-8) is writeable.
1687 // That way we avoid any off-by-ones with stack probing for additional
1688 // stack pointer updates later on.
1689 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1690 .addReg(X86::RAX, RegState::Undef)
1692 }
1693
1694 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1695 // function, and use up to 128 bytes of stack space, don't have a frame
1696 // pointer, calls, or dynamic alloca then we do not need to adjust the
1697 // stack pointer (we fit in the Red Zone). We also check that we don't
1698 // push and pop from the stack.
1699 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1700 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1701 !MFI.adjustsStack() && // No calls.
1702 !EmitStackProbeCall && // No stack probes.
1703 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1704 !MF.shouldSplitStack()) { // Regular stack
1705 uint64_t MinSize =
1707 if (HasFP)
1708 MinSize += SlotSize;
1709 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1710 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1711 MFI.setStackSize(StackSize);
1712 }
1713
1714 // Insert stack pointer adjustment for later moving of return addr. Only
1715 // applies to tail call optimized functions where the callee argument stack
1716 // size is bigger than the callers.
1717 if (TailCallArgReserveSize != 0) {
1718 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1719 /*InEpilogue=*/false)
1721 }
1722
1723 // Mapping for machine moves:
1724 //
1725 // DST: VirtualFP AND
1726 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1727 // ELSE => DW_CFA_def_cfa
1728 //
1729 // SRC: VirtualFP AND
1730 // DST: Register => DW_CFA_def_cfa_register
1731 //
1732 // ELSE
1733 // OFFSET < 0 => DW_CFA_offset_extended_sf
1734 // REG < 64 => DW_CFA_offset + Reg
1735 // ELSE => DW_CFA_offset_extended
1736
1737 uint64_t NumBytes = 0;
1738 int stackGrowth = -SlotSize;
1739
1740 // Find the funclet establisher parameter
1741 Register Establisher = X86::NoRegister;
1742 if (IsClrFunclet)
1743 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1744 else if (IsFunclet)
1745 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1746
1747 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1748 // Immediately spill establisher into the home slot.
1749 // The runtime cares about this.
1750 // MOV64mr %rdx, 16(%rsp)
1751 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1752 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1753 .addReg(Establisher)
1755 MBB.addLiveIn(Establisher);
1756 }
1757
1758 if (HasFP) {
1759 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1760
1761 // Calculate required stack adjustment.
1762 uint64_t FrameSize = StackSize - SlotSize;
1763 NumBytes =
1764 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1765
1766 // Callee-saved registers are pushed on stack before the stack is realigned.
1767 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1768 NumBytes = alignTo(NumBytes, MaxAlign);
1769
1770 // Save EBP/RBP into the appropriate stack slot.
1771 BuildMI(MBB, MBBI, DL,
1773 .addReg(MachineFramePtr, RegState::Kill)
1775
1776 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1777 // Mark the place where EBP/RBP was saved.
1778 // Define the current CFA rule to use the provided offset.
1779 assert(StackSize);
1780 BuildCFI(MBB, MBBI, DL,
1782 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1784
1785 // Change the rule for the FramePtr to be an "offset" rule.
1786 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1787 BuildCFI(MBB, MBBI, DL,
1788 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1789 2 * stackGrowth -
1790 (int)TailCallArgReserveSize),
1792 }
1793
1794 if (NeedsWinCFI) {
1795 HasWinCFI = true;
1796 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1799 }
1800
1801 if (!IsFunclet) {
1802 if (X86FI->hasSwiftAsyncContext()) {
1803 assert(!IsWin64Prologue &&
1804 "win64 prologue does not store async context right below rbp");
1805 const auto &Attrs = MF.getFunction().getAttributes();
1806
1807 // Before we update the live frame pointer we have to ensure there's a
1808 // valid (or null) asynchronous context in its slot just before FP in
1809 // the frame record, so store it now.
1810 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1811 // We have an initial context in r14, store it just before the frame
1812 // pointer.
1813 MBB.addLiveIn(X86::R14);
1814 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1815 .addReg(X86::R14)
1817 } else {
1818 // No initial context, store null so that there's no pointer that
1819 // could be misused.
1820 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1821 .addImm(0)
1823 }
1824
1825 if (NeedsWinCFI) {
1826 HasWinCFI = true;
1827 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1828 .addImm(X86::R14)
1830 }
1831
1832 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1833 .addUse(X86::RSP)
1834 .addImm(1)
1835 .addUse(X86::NoRegister)
1836 .addImm(8)
1837 .addUse(X86::NoRegister)
1839 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1840 .addUse(X86::RSP)
1841 .addImm(8)
1843 }
1844
1845 if (!IsWin64Prologue && !IsFunclet) {
1846 // Update EBP with the new base value.
1847 if (!X86FI->hasSwiftAsyncContext())
1848 BuildMI(MBB, MBBI, DL,
1849 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1850 FramePtr)
1853
1854 if (NeedsDwarfCFI) {
1855 if (ArgBaseReg.isValid()) {
1856 SmallString<64> CfaExpr;
1857 CfaExpr.push_back(dwarf::DW_CFA_expression);
1858 uint8_t buffer[16];
1859 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1860 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1861 CfaExpr.push_back(2);
1862 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1863 CfaExpr.push_back(0);
1864 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1865 BuildCFI(MBB, MBBI, DL,
1866 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1868 } else {
1869 // Mark effective beginning of when frame pointer becomes valid.
1870 // Define the current CFA to use the EBP/RBP register.
1871 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1872 BuildCFI(
1873 MBB, MBBI, DL,
1874 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1876 }
1877 }
1878
1879 if (NeedsWinFPO) {
1880 // .cv_fpo_setframe $FramePtr
1881 HasWinCFI = true;
1882 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1884 .addImm(0)
1886 }
1887 }
1888 }
1889 } else {
1890 assert(!IsFunclet && "funclets without FPs not yet implemented");
1891 NumBytes =
1892 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1893 }
1894
1895 // Update the offset adjustment, which is mainly used by codeview to translate
1896 // from ESP to VFRAME relative local variable offsets.
1897 if (!IsFunclet) {
1898 if (HasFP && TRI->hasStackRealignment(MF))
1899 MFI.setOffsetAdjustment(-NumBytes);
1900 else
1901 MFI.setOffsetAdjustment(-StackSize);
1902 }
1903
1904 // For EH funclets, only allocate enough space for outgoing calls. Save the
1905 // NumBytes value that we would've used for the parent frame.
1906 unsigned ParentFrameNumBytes = NumBytes;
1907 if (IsFunclet)
1908 NumBytes = getWinEHFuncletFrameSize(MF);
1909
1910 // Skip the callee-saved push instructions.
1911 bool PushedRegs = false;
1912 int StackOffset = 2 * stackGrowth;
1914 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1915 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1916 return false;
1917 unsigned Opc = MBBI->getOpcode();
1918 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1919 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1920 };
1921
1922 while (IsCSPush(MBBI)) {
1923 PushedRegs = true;
1924 Register Reg = MBBI->getOperand(0).getReg();
1925 LastCSPush = MBBI;
1926 ++MBBI;
1927 unsigned Opc = LastCSPush->getOpcode();
1928
1929 if (!HasFP && NeedsDwarfCFI) {
1930 // Mark callee-saved push instruction.
1931 // Define the current CFA rule to use the provided offset.
1932 assert(StackSize);
1933 // Compared to push, push2 introduces more stack offset (one more
1934 // register).
1935 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1936 StackOffset += stackGrowth;
1937 BuildCFI(MBB, MBBI, DL,
1940 StackOffset += stackGrowth;
1941 }
1942
1943 if (NeedsWinCFI) {
1944 HasWinCFI = true;
1945 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1946 .addImm(Reg)
1948 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1949 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1950 .addImm(LastCSPush->getOperand(1).getReg())
1952 }
1953 }
1954
1955 // Realign stack after we pushed callee-saved registers (so that we'll be
1956 // able to calculate their offsets from the frame pointer).
1957 // Don't do this for Win64, it needs to realign the stack after the prologue.
1958 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1959 !ArgBaseReg.isValid()) {
1960 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1961 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1962
1963 if (NeedsWinCFI) {
1964 HasWinCFI = true;
1965 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1966 .addImm(MaxAlign)
1968 }
1969 }
1970
1971 // If there is an SUB32ri of ESP immediately before this instruction, merge
1972 // the two. This can be the case when tail call elimination is enabled and
1973 // the callee has more arguments then the caller.
1974 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1975
1976 // Adjust stack pointer: ESP -= numbytes.
1977
1978 // Windows and cygwin/mingw require a prologue helper routine when allocating
1979 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1980 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1981 // stack and adjust the stack pointer in one go. The 64-bit version of
1982 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1983 // responsible for adjusting the stack pointer. Touching the stack at 4K
1984 // increments is necessary to ensure that the guard pages used by the OS
1985 // virtual memory manager are allocated in correct sequence.
1986 uint64_t AlignedNumBytes = NumBytes;
1987 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1988 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1989 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1990 assert(!X86FI->getUsesRedZone() &&
1991 "The Red Zone is not accounted for in stack probes");
1992
1993 // Check whether EAX is livein for this block.
1994 bool isEAXAlive = isEAXLiveIn(MBB);
1995
1996 if (isEAXAlive) {
1997 if (Is64Bit) {
1998 // Save RAX
1999 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
2000 .addReg(X86::RAX, RegState::Kill)
2002 } else {
2003 // Save EAX
2004 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
2005 .addReg(X86::EAX, RegState::Kill)
2007 }
2008 }
2009
2010 if (Is64Bit) {
2011 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2012 // Function prologue is responsible for adjusting the stack pointer.
2013 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2014 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
2015 .addImm(Alloc)
2017 } else {
2018 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2019 // We'll also use 4 already allocated bytes for EAX.
2020 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2021 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2023 }
2024
2025 // Call __chkstk, __chkstk_ms, or __alloca.
2026 emitStackProbe(MF, MBB, MBBI, DL, true);
2027
2028 if (isEAXAlive) {
2029 // Restore RAX/EAX
2031 if (Is64Bit)
2032 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2033 StackPtr, false, NumBytes - 8);
2034 else
2035 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2036 StackPtr, false, NumBytes - 4);
2037 MI->setFlag(MachineInstr::FrameSetup);
2038 MBB.insert(MBBI, MI);
2039 }
2040 } else if (NumBytes) {
2041 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2042 }
2043
2044 if (NeedsWinCFI && NumBytes) {
2045 HasWinCFI = true;
2046 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2047 .addImm(NumBytes)
2049 }
2050
2051 int SEHFrameOffset = 0;
2052 unsigned SPOrEstablisher;
2053 if (IsFunclet) {
2054 if (IsClrFunclet) {
2055 // The establisher parameter passed to a CLR funclet is actually a pointer
2056 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2057 // to find the root function establisher frame by loading the PSPSym from
2058 // the intermediate frame.
2059 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2060 MachinePointerInfo NoInfo;
2061 MBB.addLiveIn(Establisher);
2062 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2063 Establisher, false, PSPSlotOffset)
2066 ;
2067 // Save the root establisher back into the current funclet's (mostly
2068 // empty) frame, in case a sub-funclet or the GC needs it.
2069 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2070 false, PSPSlotOffset)
2071 .addReg(Establisher)
2073 NoInfo,
2076 }
2077 SPOrEstablisher = Establisher;
2078 } else {
2079 SPOrEstablisher = StackPtr;
2080 }
2081
2082 if (IsWin64Prologue && HasFP) {
2083 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2084 // this calculation on the incoming establisher, which holds the value of
2085 // RSP from the parent frame at the end of the prologue.
2086 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2087 if (SEHFrameOffset)
2088 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2089 SPOrEstablisher, false, SEHFrameOffset);
2090 else
2091 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2092 .addReg(SPOrEstablisher);
2093
2094 // If this is not a funclet, emit the CFI describing our frame pointer.
2095 if (NeedsWinCFI && !IsFunclet) {
2096 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2097 HasWinCFI = true;
2098 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2100 .addImm(SEHFrameOffset)
2102 if (isAsynchronousEHPersonality(Personality))
2103 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2104 }
2105 } else if (IsFunclet && STI.is32Bit()) {
2106 // Reset EBP / ESI to something good for funclets.
2108 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2109 // into the registration node so that the runtime will restore it for us.
2110 if (!MBB.isCleanupFuncletEntry()) {
2111 assert(Personality == EHPersonality::MSVC_CXX);
2112 Register FrameReg;
2114 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2115 // ESP is the first field, so no extra displacement is needed.
2116 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2117 false, EHRegOffset)
2118 .addReg(X86::ESP);
2119 }
2120 }
2121
2122 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2123 const MachineInstr &FrameInstr = *MBBI;
2124 ++MBBI;
2125
2126 if (NeedsWinCFI) {
2127 int FI;
2128 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2129 if (X86::FR64RegClass.contains(Reg)) {
2130 int Offset;
2131 Register IgnoredFrameReg;
2132 if (IsWin64Prologue && IsFunclet)
2133 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2134 else
2135 Offset =
2136 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2137 SEHFrameOffset;
2138
2139 HasWinCFI = true;
2140 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2141 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2142 .addImm(Reg)
2143 .addImm(Offset)
2145 }
2146 }
2147 }
2148 }
2149
2150 if (NeedsWinCFI && HasWinCFI)
2151 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2153
2154 if (FnHasClrFunclet && !IsFunclet) {
2155 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2156 // immediately after the prolog) into the PSPSlot so that funclets
2157 // and the GC can recover it.
2158 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2159 auto PSPInfo = MachinePointerInfo::getFixedStack(
2161 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2162 PSPSlotOffset)
2167 }
2168
2169 // Realign stack after we spilled callee-saved registers (so that we'll be
2170 // able to calculate their offsets from the frame pointer).
2171 // Win64 requires aligning the stack after the prologue.
2172 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2173 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2174 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2175 }
2176
2177 // We already dealt with stack realignment and funclets above.
2178 if (IsFunclet && STI.is32Bit())
2179 return;
2180
2181 // If we need a base pointer, set it up here. It's whatever the value
2182 // of the stack pointer is at this point. Any variable size objects
2183 // will be allocated after this, so we can still use the base pointer
2184 // to reference locals.
2185 if (TRI->hasBasePointer(MF)) {
2186 // Update the base pointer with the current stack pointer.
2187 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2188 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2189 .addReg(SPOrEstablisher)
2191 if (X86FI->getRestoreBasePointer()) {
2192 // Stash value of base pointer. Saving RSP instead of EBP shortens
2193 // dependence chain. Used by SjLj EH.
2194 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2195 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2197 .addReg(SPOrEstablisher)
2199 }
2200
2201 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2202 // Stash the value of the frame pointer relative to the base pointer for
2203 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2204 // it recovers the frame pointer from the base pointer rather than the
2205 // other way around.
2206 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2207 Register UsedReg;
2208 int Offset =
2209 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2210 .getFixed();
2211 assert(UsedReg == BasePtr);
2212 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2215 }
2216 }
2217 if (ArgBaseReg.isValid()) {
2218 // Save argument base pointer.
2219 auto *MI = X86FI->getStackPtrSaveMI();
2220 int FI = MI->getOperand(1).getIndex();
2221 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2222 // movl %basereg, offset(%ebp)
2223 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2224 .addReg(ArgBaseReg)
2226 }
2227
2228 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2229 // Mark end of stack pointer adjustment.
2230 if (!HasFP && NumBytes) {
2231 // Define the current CFA rule to use the provided offset.
2232 assert(StackSize);
2233 BuildCFI(
2234 MBB, MBBI, DL,
2235 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2237 }
2238
2239 // Emit DWARF info specifying the offsets of the callee-saved registers.
2241 }
2242
2243 // X86 Interrupt handling function cannot assume anything about the direction
2244 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2245 // in each prologue of interrupt handler function.
2246 //
2247 // Create "cld" instruction only in these cases:
2248 // 1. The interrupt handling function uses any of the "rep" instructions.
2249 // 2. Interrupt handling function calls another function.
2250 // 3. If there are any inline asm blocks, as we do not know what they do
2251 //
2252 // TODO: We should also emit cld if we detect the use of std, but as of now,
2253 // the compiler does not even emit that instruction or even define it, so in
2254 // practice, this would only happen with inline asm, which we cover anyway.
2256 bool NeedsCLD = false;
2257
2258 for (const MachineBasicBlock &B : MF) {
2259 for (const MachineInstr &MI : B) {
2260 if (MI.isCall()) {
2261 NeedsCLD = true;
2262 break;
2263 }
2264
2265 if (isOpcodeRep(MI.getOpcode())) {
2266 NeedsCLD = true;
2267 break;
2268 }
2269
2270 if (MI.isInlineAsm()) {
2271 // TODO: Parse asm for rep instructions or call sites?
2272 // For now, let's play it safe and emit a cld instruction
2273 // just in case.
2274 NeedsCLD = true;
2275 break;
2276 }
2277 }
2278 }
2279
2280 if (NeedsCLD) {
2281 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2283 }
2284 }
2285
2286 // At this point we know if the function has WinCFI or not.
2287 MF.setHasWinCFI(HasWinCFI);
2288}
2289
2291 const MachineFunction &MF) const {
2292 // We can't use LEA instructions for adjusting the stack pointer if we don't
2293 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2294 // to deallocate the stack.
2295 // This means that we can use LEA for SP in two situations:
2296 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2297 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2298 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2299}
2300
2302 switch (MI.getOpcode()) {
2303 case X86::CATCHRET:
2304 case X86::CLEANUPRET:
2305 return true;
2306 default:
2307 return false;
2308 }
2309 llvm_unreachable("impossible");
2310}
2311
2312// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2313// stack. It holds a pointer to the bottom of the root function frame. The
2314// establisher frame pointer passed to a nested funclet may point to the
2315// (mostly empty) frame of its parent funclet, but it will need to find
2316// the frame of the root function to access locals. To facilitate this,
2317// every funclet copies the pointer to the bottom of the root function
2318// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2319// same offset for the PSPSym in the root function frame that's used in the
2320// funclets' frames allows each funclet to dynamically accept any ancestor
2321// frame as its establisher argument (the runtime doesn't guarantee the
2322// immediate parent for some reason lost to history), and also allows the GC,
2323// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2324// frame with only a single offset reported for the entire method.
2325unsigned
2326X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2327 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2329 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2330 /*IgnoreSPUpdates*/ true)
2331 .getFixed();
2332 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2333 return static_cast<unsigned>(Offset);
2334}
2335
2336unsigned
2337X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2339 // This is the size of the pushed CSRs.
2340 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2341 // This is the size of callee saved XMMs.
2342 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2343 unsigned XMMSize =
2344 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2345 // This is the amount of stack a funclet needs to allocate.
2346 unsigned UsedSize;
2347 EHPersonality Personality =
2349 if (Personality == EHPersonality::CoreCLR) {
2350 // CLR funclets need to hold enough space to include the PSPSym, at the
2351 // same offset from the stack pointer (immediately after the prolog) as it
2352 // resides at in the main function.
2353 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2354 } else {
2355 // Other funclets just need enough stack for outgoing call arguments.
2356 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2357 }
2358 // RBP is not included in the callee saved register block. After pushing RBP,
2359 // everything is 16 byte aligned. Everything we allocate before an outgoing
2360 // call must also be 16 byte aligned.
2361 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2362 // Subtract out the size of the callee saved registers. This is how much stack
2363 // each funclet will allocate.
2364 return FrameSizeMinusRBP + XMMSize - CSSize;
2365}
2366
2367static bool isTailCallOpcode(unsigned Opc) {
2368 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2369 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2370 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2371}
2372
2374 MachineBasicBlock &MBB) const {
2375 const MachineFrameInfo &MFI = MF.getFrameInfo();
2378 MachineBasicBlock::iterator MBBI = Terminator;
2379 DebugLoc DL;
2380 if (MBBI != MBB.end())
2381 DL = MBBI->getDebugLoc();
2382 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2383 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2385 Register MachineFramePtr =
2386 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2387
2388 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2389 bool NeedsWin64CFI =
2390 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2391 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2392
2393 // Get the number of bytes to allocate from the FrameInfo.
2394 uint64_t StackSize = MFI.getStackSize();
2395 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2396 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2397 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2398 bool HasFP = hasFP(MF);
2399 uint64_t NumBytes = 0;
2400
2401 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2403 MF.needsFrameMoves();
2404
2405 Register ArgBaseReg;
2406 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2407 unsigned Opc = X86::LEA32r;
2408 Register StackReg = X86::ESP;
2409 ArgBaseReg = MI->getOperand(0).getReg();
2410 if (STI.is64Bit()) {
2411 Opc = X86::LEA64r;
2412 StackReg = X86::RSP;
2413 }
2414 // leal -4(%basereg), %esp
2415 // .cfi_def_cfa %esp, 4
2416 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2417 .addUse(ArgBaseReg)
2418 .addImm(1)
2419 .addUse(X86::NoRegister)
2420 .addImm(-(int64_t)SlotSize)
2421 .addUse(X86::NoRegister)
2423 if (NeedsDwarfCFI) {
2424 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2425 BuildCFI(MBB, MBBI, DL,
2426 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2428 --MBBI;
2429 }
2430 --MBBI;
2431 }
2432
2433 if (IsFunclet) {
2434 assert(HasFP && "EH funclets without FP not yet implemented");
2435 NumBytes = getWinEHFuncletFrameSize(MF);
2436 } else if (HasFP) {
2437 // Calculate required stack adjustment.
2438 uint64_t FrameSize = StackSize - SlotSize;
2439 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2440
2441 // Callee-saved registers were pushed on stack before the stack was
2442 // realigned.
2443 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2444 NumBytes = alignTo(FrameSize, MaxAlign);
2445 } else {
2446 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2447 }
2448 uint64_t SEHStackAllocAmt = NumBytes;
2449
2450 // AfterPop is the position to insert .cfi_restore.
2452 if (HasFP) {
2453 if (X86FI->hasSwiftAsyncContext()) {
2454 // Discard the context.
2455 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2456 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2457 }
2458 // Pop EBP.
2459 BuildMI(MBB, MBBI, DL,
2461 MachineFramePtr)
2463
2464 // We need to reset FP to its untagged state on return. Bit 60 is currently
2465 // used to show the presence of an extended frame.
2466 if (X86FI->hasSwiftAsyncContext()) {
2467 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2468 .addUse(MachineFramePtr)
2469 .addImm(60)
2471 }
2472
2473 if (NeedsDwarfCFI) {
2474 if (!ArgBaseReg.isValid()) {
2475 unsigned DwarfStackPtr =
2476 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2477 BuildCFI(MBB, MBBI, DL,
2478 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2480 }
2481 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2482 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2483 BuildCFI(MBB, AfterPop, DL,
2484 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2486 --MBBI;
2487 --AfterPop;
2488 }
2489 --MBBI;
2490 }
2491 }
2492
2493 MachineBasicBlock::iterator FirstCSPop = MBBI;
2494 // Skip the callee-saved pop instructions.
2495 while (MBBI != MBB.begin()) {
2496 MachineBasicBlock::iterator PI = std::prev(MBBI);
2497 unsigned Opc = PI->getOpcode();
2498
2499 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2500 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2501 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2502 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2503 Opc != X86::POP2P && Opc != X86::LEA64r))
2504 break;
2505 FirstCSPop = PI;
2506 }
2507
2508 --MBBI;
2509 }
2510 if (ArgBaseReg.isValid()) {
2511 // Restore argument base pointer.
2512 auto *MI = X86FI->getStackPtrSaveMI();
2513 int FI = MI->getOperand(1).getIndex();
2514 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2515 // movl offset(%ebp), %basereg
2516 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2518 }
2519 MBBI = FirstCSPop;
2520
2521 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2522 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2523
2524 if (MBBI != MBB.end())
2525 DL = MBBI->getDebugLoc();
2526 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2527 // instruction, merge the two instructions.
2528 if (NumBytes || MFI.hasVarSizedObjects())
2529 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2530
2531 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2532 // slot before popping them off! Same applies for the case, when stack was
2533 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2534 // will not do realignment or dynamic stack allocation.
2535 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2536 !IsFunclet) {
2537 if (TRI->hasStackRealignment(MF))
2538 MBBI = FirstCSPop;
2539 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2540 uint64_t LEAAmount =
2541 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2542
2543 if (X86FI->hasSwiftAsyncContext())
2544 LEAAmount -= 16;
2545
2546 // There are only two legal forms of epilogue:
2547 // - add SEHAllocationSize, %rsp
2548 // - lea SEHAllocationSize(%FramePtr), %rsp
2549 //
2550 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2551 // However, we may use this sequence if we have a frame pointer because the
2552 // effects of the prologue can safely be undone.
2553 if (LEAAmount != 0) {
2554 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2556 false, LEAAmount);
2557 --MBBI;
2558 } else {
2559 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2560 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2561 --MBBI;
2562 }
2563 } else if (NumBytes) {
2564 // Adjust stack pointer back: ESP += numbytes.
2565 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2566 if (!HasFP && NeedsDwarfCFI) {
2567 // Define the current CFA rule to use the provided offset.
2568 BuildCFI(MBB, MBBI, DL,
2570 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2572 }
2573 --MBBI;
2574 }
2575
2576 // Windows unwinder will not invoke function's exception handler if IP is
2577 // either in prologue or in epilogue. This behavior causes a problem when a
2578 // call immediately precedes an epilogue, because the return address points
2579 // into the epilogue. To cope with that, we insert an epilogue marker here,
2580 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2581 // final emitted code.
2582 if (NeedsWin64CFI && MF.hasWinCFI())
2583 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2584
2585 if (!HasFP && NeedsDwarfCFI) {
2586 MBBI = FirstCSPop;
2587 int64_t Offset = -(int64_t)CSSize - SlotSize;
2588 // Mark callee-saved pop instruction.
2589 // Define the current CFA rule to use the provided offset.
2590 while (MBBI != MBB.end()) {
2592 unsigned Opc = PI->getOpcode();
2593 ++MBBI;
2594 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2595 Opc == X86::POP2 || Opc == X86::POP2P) {
2596 Offset += SlotSize;
2597 // Compared to pop, pop2 introduces more stack offset (one more
2598 // register).
2599 if (Opc == X86::POP2 || Opc == X86::POP2P)
2600 Offset += SlotSize;
2601 BuildCFI(MBB, MBBI, DL,
2604 }
2605 }
2606 }
2607
2608 // Emit DWARF info specifying the restores of the callee-saved registers.
2609 // For epilogue with return inside or being other block without successor,
2610 // no need to generate .cfi_restore for callee-saved registers.
2611 if (NeedsDwarfCFI && !MBB.succ_empty())
2612 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2613
2614 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2615 // Add the return addr area delta back since we are not tail calling.
2616 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2617 assert(Offset >= 0 && "TCDelta should never be positive");
2618 if (Offset) {
2619 // Check for possible merge with preceding ADD instruction.
2620 Offset += mergeSPUpdates(MBB, Terminator, true);
2621 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2622 }
2623 }
2624
2625 // Emit tilerelease for AMX kernel.
2627 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2628}
2629
2631 int FI,
2632 Register &FrameReg) const {
2633 const MachineFrameInfo &MFI = MF.getFrameInfo();
2634
2635 bool IsFixed = MFI.isFixedObjectIndex(FI);
2636 // We can't calculate offset from frame pointer if the stack is realigned,
2637 // so enforce usage of stack/base pointer. The base pointer is used when we
2638 // have dynamic allocas in addition to dynamic realignment.
2639 if (TRI->hasBasePointer(MF))
2640 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2641 else if (TRI->hasStackRealignment(MF))
2642 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2643 else
2644 FrameReg = TRI->getFrameRegister(MF);
2645
2646 // Offset will hold the offset from the stack pointer at function entry to the
2647 // object.
2648 // We need to factor in additional offsets applied during the prologue to the
2649 // frame, base, and stack pointer depending on which is used.
2652 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2653 uint64_t StackSize = MFI.getStackSize();
2654 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2655 int64_t FPDelta = 0;
2656
2657 // In an x86 interrupt, remove the offset we added to account for the return
2658 // address from any stack object allocated in the caller's frame. Interrupts
2659 // do not have a standard return address. Fixed objects in the current frame,
2660 // such as SSE register spills, should not get this treatment.
2662 Offset >= 0) {
2664 }
2665
2666 if (IsWin64Prologue) {
2667 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2668
2669 // Calculate required stack adjustment.
2670 uint64_t FrameSize = StackSize - SlotSize;
2671 // If required, include space for extra hidden slot for stashing base
2672 // pointer.
2673 if (X86FI->getRestoreBasePointer())
2674 FrameSize += SlotSize;
2675 uint64_t NumBytes = FrameSize - CSSize;
2676
2677 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2678 if (FI && FI == X86FI->getFAIndex())
2679 return StackOffset::getFixed(-SEHFrameOffset);
2680
2681 // FPDelta is the offset from the "traditional" FP location of the old base
2682 // pointer followed by return address and the location required by the
2683 // restricted Win64 prologue.
2684 // Add FPDelta to all offsets below that go through the frame pointer.
2685 FPDelta = FrameSize - SEHFrameOffset;
2686 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2687 "FPDelta isn't aligned per the Win64 ABI!");
2688 }
2689
2690 if (FrameReg == TRI->getFramePtr()) {
2691 // Skip saved EBP/RBP
2692 Offset += SlotSize;
2693
2694 // Account for restricted Windows prologue.
2695 Offset += FPDelta;
2696
2697 // Skip the RETADDR move area
2698 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2699 if (TailCallReturnAddrDelta < 0)
2700 Offset -= TailCallReturnAddrDelta;
2701
2703 }
2704
2705 // FrameReg is either the stack pointer or a base pointer. But the base is
2706 // located at the end of the statically known StackSize so the distinction
2707 // doesn't really matter.
2708 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2709 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2710 return StackOffset::getFixed(Offset + StackSize);
2711}
2712
2714 Register &FrameReg) const {
2715 const MachineFrameInfo &MFI = MF.getFrameInfo();
2717 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2718 const auto it = WinEHXMMSlotInfo.find(FI);
2719
2720 if (it == WinEHXMMSlotInfo.end())
2721 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2722
2723 FrameReg = TRI->getStackRegister();
2725 it->second;
2726}
2727
2730 Register &FrameReg,
2731 int Adjustment) const {
2732 const MachineFrameInfo &MFI = MF.getFrameInfo();
2733 FrameReg = TRI->getStackRegister();
2734 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2735 getOffsetOfLocalArea() + Adjustment);
2736}
2737
2740 int FI, Register &FrameReg,
2741 bool IgnoreSPUpdates) const {
2742
2743 const MachineFrameInfo &MFI = MF.getFrameInfo();
2744 // Does not include any dynamic realign.
2745 const uint64_t StackSize = MFI.getStackSize();
2746 // LLVM arranges the stack as follows:
2747 // ...
2748 // ARG2
2749 // ARG1
2750 // RETADDR
2751 // PUSH RBP <-- RBP points here
2752 // PUSH CSRs
2753 // ~~~~~~~ <-- possible stack realignment (non-win64)
2754 // ...
2755 // STACK OBJECTS
2756 // ... <-- RSP after prologue points here
2757 // ~~~~~~~ <-- possible stack realignment (win64)
2758 //
2759 // if (hasVarSizedObjects()):
2760 // ... <-- "base pointer" (ESI/RBX) points here
2761 // DYNAMIC ALLOCAS
2762 // ... <-- RSP points here
2763 //
2764 // Case 1: In the simple case of no stack realignment and no dynamic
2765 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2766 // with fixed offsets from RSP.
2767 //
2768 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2769 // stack objects are addressed with RBP and regular stack objects with RSP.
2770 //
2771 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2772 // to address stack arguments for outgoing calls and nothing else. The "base
2773 // pointer" points to local variables, and RBP points to fixed objects.
2774 //
2775 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2776 // answer we give is relative to the SP after the prologue, and not the
2777 // SP in the middle of the function.
2778
2779 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2780 !STI.isTargetWin64())
2781 return getFrameIndexReference(MF, FI, FrameReg);
2782
2783 // If !hasReservedCallFrame the function might have SP adjustement in the
2784 // body. So, even though the offset is statically known, it depends on where
2785 // we are in the function.
2786 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2787 return getFrameIndexReference(MF, FI, FrameReg);
2788
2789 // We don't handle tail calls, and shouldn't be seeing them either.
2791 "we don't handle this case!");
2792
2793 // This is how the math works out:
2794 //
2795 // %rsp grows (i.e. gets lower) left to right. Each box below is
2796 // one word (eight bytes). Obj0 is the stack slot we're trying to
2797 // get to.
2798 //
2799 // ----------------------------------
2800 // | BP | Obj0 | Obj1 | ... | ObjN |
2801 // ----------------------------------
2802 // ^ ^ ^ ^
2803 // A B C E
2804 //
2805 // A is the incoming stack pointer.
2806 // (B - A) is the local area offset (-8 for x86-64) [1]
2807 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2808 //
2809 // |(E - B)| is the StackSize (absolute value, positive). For a
2810 // stack that grown down, this works out to be (B - E). [3]
2811 //
2812 // E is also the value of %rsp after stack has been set up, and we
2813 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2814 // (C - E) == (C - A) - (B - A) + (B - E)
2815 // { Using [1], [2] and [3] above }
2816 // == getObjectOffset - LocalAreaOffset + StackSize
2817
2818 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2819}
2820
2823 std::vector<CalleeSavedInfo> &CSI) const {
2824 MachineFrameInfo &MFI = MF.getFrameInfo();
2826
2827 unsigned CalleeSavedFrameSize = 0;
2828 unsigned XMMCalleeSavedFrameSize = 0;
2829 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2830 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2831
2832 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2833
2834 if (TailCallReturnAddrDelta < 0) {
2835 // create RETURNADDR area
2836 // arg
2837 // arg
2838 // RETADDR
2839 // { ...
2840 // RETADDR area
2841 // ...
2842 // }
2843 // [EBP]
2844 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2845 TailCallReturnAddrDelta - SlotSize, true);
2846 }
2847
2848 // Spill the BasePtr if it's used.
2849 if (this->TRI->hasBasePointer(MF)) {
2850 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2851 if (MF.hasEHFunclets()) {
2853 X86FI->setHasSEHFramePtrSave(true);
2854 X86FI->setSEHFramePtrSaveIndex(FI);
2855 }
2856 }
2857
2858 if (hasFP(MF)) {
2859 // emitPrologue always spills frame register the first thing.
2860 SpillSlotOffset -= SlotSize;
2861 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2862
2863 // The async context lives directly before the frame pointer, and we
2864 // allocate a second slot to preserve stack alignment.
2865 if (X86FI->hasSwiftAsyncContext()) {
2866 SpillSlotOffset -= SlotSize;
2867 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2868 SpillSlotOffset -= SlotSize;
2869 }
2870
2871 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2872 // the frame register, we can delete it from CSI list and not have to worry
2873 // about avoiding it later.
2875 for (unsigned i = 0; i < CSI.size(); ++i) {
2876 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2877 CSI.erase(CSI.begin() + i);
2878 break;
2879 }
2880 }
2881 }
2882
2883 // Strategy:
2884 // 1. Use push2 when
2885 // a) number of CSR > 1 if no need padding
2886 // b) number of CSR > 2 if need padding
2887 // 2. When the number of CSR push is odd
2888 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2889 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2890 // 3. When the number of CSR push is even, start to use push2 from the 1st
2891 // push and make the stack 16B aligned before the push
2892 unsigned NumRegsForPush2 = 0;
2893 if (STI.hasPush2Pop2()) {
2894 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2895 return X86::GR64RegClass.contains(I.getReg());
2896 });
2897 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2898 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2899 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2900 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2901 if (X86FI->padForPush2Pop2()) {
2902 SpillSlotOffset -= SlotSize;
2903 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2904 }
2905 }
2906
2907 // Assign slots for GPRs. It increases frame size.
2908 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2909 Register Reg = I.getReg();
2910
2911 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2912 continue;
2913
2914 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2915 // or only an odd number of registers in the candidates.
2916 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2917 (SpillSlotOffset % 16 == 0 ||
2918 X86FI->getNumCandidatesForPush2Pop2() % 2))
2919 X86FI->addCandidateForPush2Pop2(Reg);
2920
2921 SpillSlotOffset -= SlotSize;
2922 CalleeSavedFrameSize += SlotSize;
2923
2924 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2925 I.setFrameIdx(SlotIndex);
2926 }
2927
2928 // Adjust the offset of spill slot as we know the accurate callee saved frame
2929 // size.
2930 if (X86FI->getRestoreBasePointer()) {
2931 SpillSlotOffset -= SlotSize;
2932 CalleeSavedFrameSize += SlotSize;
2933
2934 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2935 // TODO: saving the slot index is better?
2936 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2937 }
2938 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2939 "Expect even candidates for push2/pop2");
2940 if (X86FI->getNumCandidatesForPush2Pop2())
2941 ++NumFunctionUsingPush2Pop2;
2942 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2943 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2944
2945 // Assign slots for XMMs.
2946 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2947 Register Reg = I.getReg();
2948 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2949 continue;
2950
2951 // If this is k-register make sure we lookup via the largest legal type.
2952 MVT VT = MVT::Other;
2953 if (X86::VK16RegClass.contains(Reg))
2954 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2955
2956 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2957 unsigned Size = TRI->getSpillSize(*RC);
2958 Align Alignment = TRI->getSpillAlign(*RC);
2959 // ensure alignment
2960 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2961 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2962
2963 // spill into slot
2964 SpillSlotOffset -= Size;
2965 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2966 I.setFrameIdx(SlotIndex);
2967 MFI.ensureMaxAlignment(Alignment);
2968
2969 // Save the start offset and size of XMM in stack frame for funclets.
2970 if (X86::VR128RegClass.contains(Reg)) {
2971 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2972 XMMCalleeSavedFrameSize += Size;
2973 }
2974 }
2975
2976 return true;
2977}
2978
2983
2984 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2985 // for us, and there are no XMM CSRs on Win32.
2986 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2987 return true;
2988
2989 // Push GPRs. It increases frame size.
2990 const MachineFunction &MF = *MBB.getParent();
2992 if (X86FI->padForPush2Pop2())
2993 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2994
2995 // Update LiveIn of the basic block and decide whether we can add a kill flag
2996 // to the use.
2997 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
2998 const MachineRegisterInfo &MRI = MF.getRegInfo();
2999 // Do not set a kill flag on values that are also marked as live-in. This
3000 // happens with the @llvm-returnaddress intrinsic and with arguments
3001 // passed in callee saved registers.
3002 // Omitting the kill flags is conservatively correct even if the live-in
3003 // is not used after all.
3004 if (MRI.isLiveIn(Reg))
3005 return false;
3006 MBB.addLiveIn(Reg);
3007 // Check if any subregister is live-in
3008 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3009 if (MRI.isLiveIn(*AReg))
3010 return false;
3011 return true;
3012 };
3013 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3014 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
3015 };
3016
3017 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3018 Register Reg = RI->getReg();
3019 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3020 continue;
3021
3022 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3023 Register Reg2 = (++RI)->getReg();
3025 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3026 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3028 } else {
3029 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3030 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3032 }
3033 }
3034
3035 if (X86FI->getRestoreBasePointer()) {
3036 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3037 Register BaseReg = this->TRI->getBaseRegister();
3038 BuildMI(MBB, MI, DL, TII.get(Opc))
3039 .addReg(BaseReg, getKillRegState(true))
3041 }
3042
3043 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3044 // It can be done by spilling XMMs to stack frame.
3045 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3046 Register Reg = I.getReg();
3047 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3048 continue;
3049
3050 // If this is k-register make sure we lookup via the largest legal type.
3051 MVT VT = MVT::Other;
3052 if (X86::VK16RegClass.contains(Reg))
3053 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3054
3055 // Add the callee-saved register as live-in. It's killed at the spill.
3056 MBB.addLiveIn(Reg);
3057 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3058
3059 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3060 Register());
3061 --MI;
3062 MI->setFlag(MachineInstr::FrameSetup);
3063 ++MI;
3064 }
3065
3066 return true;
3067}
3068
3069void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3071 MachineInstr *CatchRet) const {
3072 // SEH shouldn't use catchret.
3075 "SEH should not use CATCHRET");
3076 const DebugLoc &DL = CatchRet->getDebugLoc();
3077 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3078
3079 // Fill EAX/RAX with the address of the target block.
3080 if (STI.is64Bit()) {
3081 // LEA64r CatchRetTarget(%rip), %rax
3082 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3083 .addReg(X86::RIP)
3084 .addImm(0)
3085 .addReg(0)
3086 .addMBB(CatchRetTarget)
3087 .addReg(0);
3088 } else {
3089 // MOV32ri $CatchRetTarget, %eax
3090 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3091 .addMBB(CatchRetTarget);
3092 }
3093
3094 // Record that we've taken the address of CatchRetTarget and no longer just
3095 // reference it in a terminator.
3096 CatchRetTarget->setMachineBlockAddressTaken();
3097}
3098
3102 if (CSI.empty())
3103 return false;
3104
3105 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3106 // Don't restore CSRs in 32-bit EH funclets. Matches
3107 // spillCalleeSavedRegisters.
3108 if (STI.is32Bit())
3109 return true;
3110 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3111 // funclets. emitEpilogue transforms these to normal jumps.
3112 if (MI->getOpcode() == X86::CATCHRET) {
3113 const Function &F = MBB.getParent()->getFunction();
3114 bool IsSEH = isAsynchronousEHPersonality(
3115 classifyEHPersonality(F.getPersonalityFn()));
3116 if (IsSEH)
3117 return true;
3118 }
3119 }
3120
3122
3123 // Reload XMMs from stack frame.
3124 for (const CalleeSavedInfo &I : CSI) {
3125 Register Reg = I.getReg();
3126 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3127 continue;
3128
3129 // If this is k-register make sure we lookup via the largest legal type.
3130 MVT VT = MVT::Other;
3131 if (X86::VK16RegClass.contains(Reg))
3132 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3133
3134 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3135 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3136 Register());
3137 }
3138
3139 // Clear the stack slot for spill base pointer register.
3140 MachineFunction &MF = *MBB.getParent();
3142 if (X86FI->getRestoreBasePointer()) {
3143 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3144 Register BaseReg = this->TRI->getBaseRegister();
3145 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3147 }
3148
3149 // POP GPRs.
3150 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3151 Register Reg = I->getReg();
3152 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3153 continue;
3154
3155 if (X86FI->isCandidateForPush2Pop2(Reg))
3156 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3159 else
3160 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3162 }
3163 if (X86FI->padForPush2Pop2())
3164 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3165
3166 return true;
3167}
3168
3170 BitVector &SavedRegs,
3171 RegScavenger *RS) const {
3173
3174 // Spill the BasePtr if it's used.
3175 if (TRI->hasBasePointer(MF)) {
3176 Register BasePtr = TRI->getBaseRegister();
3177 if (STI.isTarget64BitILP32())
3178 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3179 SavedRegs.set(BasePtr);
3180 }
3181}
3182
3183static bool HasNestArgument(const MachineFunction *MF) {
3184 const Function &F = MF->getFunction();
3185 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3186 I++) {
3187 if (I->hasNestAttr() && !I->use_empty())
3188 return true;
3189 }
3190 return false;
3191}
3192
3193/// GetScratchRegister - Get a temp register for performing work in the
3194/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3195/// and the properties of the function either one or two registers will be
3196/// needed. Set primary to true for the first register, false for the second.
3197static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3198 const MachineFunction &MF, bool Primary) {
3199 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3200
3201 // Erlang stuff.
3202 if (CallingConvention == CallingConv::HiPE) {
3203 if (Is64Bit)
3204 return Primary ? X86::R14 : X86::R13;
3205 else
3206 return Primary ? X86::EBX : X86::EDI;
3207 }
3208
3209 if (Is64Bit) {
3210 if (IsLP64)
3211 return Primary ? X86::R11 : X86::R12;
3212 else
3213 return Primary ? X86::R11D : X86::R12D;
3214 }
3215
3216 bool IsNested = HasNestArgument(&MF);
3217
3218 if (CallingConvention == CallingConv::X86_FastCall ||
3219 CallingConvention == CallingConv::Fast ||
3220 CallingConvention == CallingConv::Tail) {
3221 if (IsNested)
3222 report_fatal_error("Segmented stacks does not support fastcall with "
3223 "nested function.");
3224 return Primary ? X86::EAX : X86::ECX;
3225 }
3226 if (IsNested)
3227 return Primary ? X86::EDX : X86::EAX;
3228 return Primary ? X86::ECX : X86::EAX;
3229}
3230
3231// The stack limit in the TCB is set to this many bytes above the actual stack
3232// limit.
3234
3236 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3237 MachineFrameInfo &MFI = MF.getFrameInfo();
3238 uint64_t StackSize;
3239 unsigned TlsReg, TlsOffset;
3240 DebugLoc DL;
3241
3242 // To support shrink-wrapping we would need to insert the new blocks
3243 // at the right place and update the branches to PrologueMBB.
3244 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3245
3246 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3247 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3248 "Scratch register is live-in");
3249
3250 if (MF.getFunction().isVarArg())
3251 report_fatal_error("Segmented stacks do not support vararg functions.");
3252 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3255 report_fatal_error("Segmented stacks not supported on this platform.");
3256
3257 // Eventually StackSize will be calculated by a link-time pass; which will
3258 // also decide whether checking code needs to be injected into this particular
3259 // prologue.
3260 StackSize = MFI.getStackSize();
3261
3262 if (!MFI.needsSplitStackProlog())
3263 return;
3264
3268 bool IsNested = false;
3269
3270 // We need to know if the function has a nest argument only in 64 bit mode.
3271 if (Is64Bit)
3272 IsNested = HasNestArgument(&MF);
3273
3274 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3275 // allocMBB needs to be last (terminating) instruction.
3276
3277 for (const auto &LI : PrologueMBB.liveins()) {
3278 allocMBB->addLiveIn(LI);
3279 checkMBB->addLiveIn(LI);
3280 }
3281
3282 if (IsNested)
3283 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3284
3285 MF.push_front(allocMBB);
3286 MF.push_front(checkMBB);
3287
3288 // When the frame size is less than 256 we just compare the stack
3289 // boundary directly to the value of the stack pointer, per gcc.
3290 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3291
3292 // Read the limit off the current stacklet off the stack_guard location.
3293 if (Is64Bit) {
3294 if (STI.isTargetLinux()) {
3295 TlsReg = X86::FS;
3296 TlsOffset = IsLP64 ? 0x70 : 0x40;
3297 } else if (STI.isTargetDarwin()) {
3298 TlsReg = X86::GS;
3299 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3300 } else if (STI.isTargetWin64()) {
3301 TlsReg = X86::GS;
3302 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3303 } else if (STI.isTargetFreeBSD()) {
3304 TlsReg = X86::FS;
3305 TlsOffset = 0x18;
3306 } else if (STI.isTargetDragonFly()) {
3307 TlsReg = X86::FS;
3308 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3309 } else {
3310 report_fatal_error("Segmented stacks not supported on this platform.");
3311 }
3312
3313 if (CompareStackPointer)
3314 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3315 else
3316 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3317 ScratchReg)
3318 .addReg(X86::RSP)
3319 .addImm(1)
3320 .addReg(0)
3321 .addImm(-StackSize)
3322 .addReg(0);
3323
3324 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3325 .addReg(ScratchReg)
3326 .addReg(0)
3327 .addImm(1)
3328 .addReg(0)
3329 .addImm(TlsOffset)
3330 .addReg(TlsReg);
3331 } else {
3332 if (STI.isTargetLinux()) {
3333 TlsReg = X86::GS;
3334 TlsOffset = 0x30;
3335 } else if (STI.isTargetDarwin()) {
3336 TlsReg = X86::GS;
3337 TlsOffset = 0x48 + 90 * 4;
3338 } else if (STI.isTargetWin32()) {
3339 TlsReg = X86::FS;
3340 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3341 } else if (STI.isTargetDragonFly()) {
3342 TlsReg = X86::FS;
3343 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3344 } else if (STI.isTargetFreeBSD()) {
3345 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3346 } else {
3347 report_fatal_error("Segmented stacks not supported on this platform.");
3348 }
3349
3350 if (CompareStackPointer)
3351 ScratchReg = X86::ESP;
3352 else
3353 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3354 .addReg(X86::ESP)
3355 .addImm(1)
3356 .addReg(0)
3357 .addImm(-StackSize)
3358 .addReg(0);
3359
3362 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3363 .addReg(ScratchReg)
3364 .addReg(0)
3365 .addImm(0)
3366 .addReg(0)
3367 .addImm(TlsOffset)
3368 .addReg(TlsReg);
3369 } else if (STI.isTargetDarwin()) {
3370
3371 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3372 unsigned ScratchReg2;
3373 bool SaveScratch2;
3374 if (CompareStackPointer) {
3375 // The primary scratch register is available for holding the TLS offset.
3376 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3377 SaveScratch2 = false;
3378 } else {
3379 // Need to use a second register to hold the TLS offset
3380 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3381
3382 // Unfortunately, with fastcc the second scratch register may hold an
3383 // argument.
3384 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3385 }
3386
3387 // If Scratch2 is live-in then it needs to be saved.
3388 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3389 "Scratch register is live-in and not saved");
3390
3391 if (SaveScratch2)
3392 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3393 .addReg(ScratchReg2, RegState::Kill);
3394
3395 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3396 .addImm(TlsOffset);
3397 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3398 .addReg(ScratchReg)
3399 .addReg(ScratchReg2)
3400 .addImm(1)
3401 .addReg(0)
3402 .addImm(0)
3403 .addReg(TlsReg);
3404
3405 if (SaveScratch2)
3406 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3407 }
3408 }
3409
3410 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3411 // It jumps to normal execution of the function body.
3412 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3413 .addMBB(&PrologueMBB)
3415
3416 // On 32 bit we first push the arguments size and then the frame size. On 64
3417 // bit, we pass the stack frame size in r10 and the argument size in r11.
3418 if (Is64Bit) {
3419 // Functions with nested arguments use R10, so it needs to be saved across
3420 // the call to _morestack
3421
3422 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3423 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3424 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3425 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3426
3427 if (IsNested)
3428 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3429
3430 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3431 .addImm(StackSize);
3432 BuildMI(allocMBB, DL,
3434 Reg11)
3435 .addImm(X86FI->getArgumentStackSize());
3436 } else {
3437 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3438 .addImm(X86FI->getArgumentStackSize());
3439 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3440 }
3441
3442 // __morestack is in libgcc
3444 // Under the large code model, we cannot assume that __morestack lives
3445 // within 2^31 bytes of the call site, so we cannot use pc-relative
3446 // addressing. We cannot perform the call via a temporary register,
3447 // as the rax register may be used to store the static chain, and all
3448 // other suitable registers may be either callee-save or used for
3449 // parameter passing. We cannot use the stack at this point either
3450 // because __morestack manipulates the stack directly.
3451 //
3452 // To avoid these issues, perform an indirect call via a read-only memory
3453 // location containing the address.
3454 //
3455 // This solution is not perfect, as it assumes that the .rodata section
3456 // is laid out within 2^31 bytes of each function body, but this seems
3457 // to be sufficient for JIT.
3458 // FIXME: Add retpoline support and remove the error here..
3460 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3461 "code model and thunks not yet implemented.");
3462 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3463 .addReg(X86::RIP)
3464 .addImm(0)
3465 .addReg(0)
3466 .addExternalSymbol("__morestack_addr")
3467 .addReg(0);
3468 } else {
3469 if (Is64Bit)
3470 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3471 .addExternalSymbol("__morestack");
3472 else
3473 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3474 .addExternalSymbol("__morestack");
3475 }
3476
3477 if (IsNested)
3478 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3479 else
3480 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3481
3482 allocMBB->addSuccessor(&PrologueMBB);
3483
3484 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3485 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3486
3487#ifdef EXPENSIVE_CHECKS
3488 MF.verify();
3489#endif
3490}
3491
3492/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3493/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3494/// to fields it needs, through a named metadata node "hipe.literals" containing
3495/// name-value pairs.
3496static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3497 const StringRef LiteralName) {
3498 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3499 MDNode *Node = HiPELiteralsMD->getOperand(i);
3500 if (Node->getNumOperands() != 2)
3501 continue;
3502 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3503 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3504 if (!NodeName || !NodeVal)
3505 continue;
3506 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3507 if (ValConst && NodeName->getString() == LiteralName) {
3508 return ValConst->getZExtValue();
3509 }
3510 }
3511
3512 report_fatal_error("HiPE literal " + LiteralName +
3513 " required but not provided");
3514}
3515
3516// Return true if there are no non-ehpad successors to MBB and there are no
3517// non-meta instructions between MBBI and MBB.end().
3520 return llvm::all_of(
3521 MBB.successors(),
3522 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3523 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3524 return MI.isMetaInstruction();
3525 });
3526}
3527
3528/// Erlang programs may need a special prologue to handle the stack size they
3529/// might need at runtime. That is because Erlang/OTP does not implement a C
3530/// stack but uses a custom implementation of hybrid stack/heap architecture.
3531/// (for more information see Eric Stenman's Ph.D. thesis:
3532/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3533///
3534/// CheckStack:
3535/// temp0 = sp - MaxStack
3536/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3537/// OldStart:
3538/// ...
3539/// IncStack:
3540/// call inc_stack # doubles the stack space
3541/// temp0 = sp - MaxStack
3542/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3544 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3545 MachineFrameInfo &MFI = MF.getFrameInfo();
3546 DebugLoc DL;
3547
3548 // To support shrink-wrapping we would need to insert the new blocks
3549 // at the right place and update the branches to PrologueMBB.
3550 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3551
3552 // HiPE-specific values
3553 NamedMDNode *HiPELiteralsMD =
3554 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3555 if (!HiPELiteralsMD)
3557 "Can't generate HiPE prologue without runtime parameters");
3558 const unsigned HipeLeafWords = getHiPELiteral(
3559 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3560 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3561 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3562 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3563 ? MF.getFunction().arg_size() - CCRegisteredArgs
3564 : 0;
3565 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3566
3568 "HiPE prologue is only supported on Linux operating systems.");
3569
3570 // Compute the largest caller's frame that is needed to fit the callees'
3571 // frames. This 'MaxStack' is computed from:
3572 //
3573 // a) the fixed frame size, which is the space needed for all spilled temps,
3574 // b) outgoing on-stack parameter areas, and
3575 // c) the minimum stack space this function needs to make available for the
3576 // functions it calls (a tunable ABI property).
3577 if (MFI.hasCalls()) {
3578 unsigned MoreStackForCalls = 0;
3579
3580 for (auto &MBB : MF) {
3581 for (auto &MI : MBB) {
3582 if (!MI.isCall())
3583 continue;
3584
3585 // Get callee operand.
3586 const MachineOperand &MO = MI.getOperand(0);
3587
3588 // Only take account of global function calls (no closures etc.).
3589 if (!MO.isGlobal())
3590 continue;
3591
3592 const Function *F = dyn_cast<Function>(MO.getGlobal());
3593 if (!F)
3594 continue;
3595
3596 // Do not update 'MaxStack' for primitive and built-in functions
3597 // (encoded with names either starting with "erlang."/"bif_" or not
3598 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3599 // "_", such as the BIF "suspend_0") as they are executed on another
3600 // stack.
3601 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3602 F->getName().find_first_of("._") == StringRef::npos)
3603 continue;
3604
3605 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3606 ? F->arg_size() - CCRegisteredArgs
3607 : 0;
3608 if (HipeLeafWords - 1 > CalleeStkArity)
3609 MoreStackForCalls =
3610 std::max(MoreStackForCalls,
3611 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3612 }
3613 }
3614 MaxStack += MoreStackForCalls;
3615 }
3616
3617 // If the stack frame needed is larger than the guaranteed then runtime checks
3618 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3619 if (MaxStack > Guaranteed) {
3620 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3621 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3622
3623 for (const auto &LI : PrologueMBB.liveins()) {
3624 stackCheckMBB->addLiveIn(LI);
3625 incStackMBB->addLiveIn(LI);
3626 }
3627
3628 MF.push_front(incStackMBB);
3629 MF.push_front(stackCheckMBB);
3630
3631 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3632 unsigned LEAop, CMPop, CALLop;
3633 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3634 if (Is64Bit) {
3635 SPReg = X86::RSP;
3636 PReg = X86::RBP;
3637 LEAop = X86::LEA64r;
3638 CMPop = X86::CMP64rm;
3639 CALLop = X86::CALL64pcrel32;
3640 } else {
3641 SPReg = X86::ESP;
3642 PReg = X86::EBP;
3643 LEAop = X86::LEA32r;
3644 CMPop = X86::CMP32rm;
3645 CALLop = X86::CALLpcrel32;
3646 }
3647
3648 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3649 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3650 "HiPE prologue scratch register is live-in");
3651
3652 // Create new MBB for StackCheck:
3653 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3654 false, -MaxStack);
3655 // SPLimitOffset is in a fixed heap location (pointed by BP).
3656 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3657 PReg, false, SPLimitOffset);
3658 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3659 .addMBB(&PrologueMBB)
3661
3662 // Create new MBB for IncStack:
3663 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3664 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3665 false, -MaxStack);
3666 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3667 PReg, false, SPLimitOffset);
3668 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3669 .addMBB(incStackMBB)
3671
3672 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3673 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3674 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3675 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3676 }
3677#ifdef EXPENSIVE_CHECKS
3678 MF.verify();
3679#endif
3680}
3681
3682bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3684 const DebugLoc &DL,
3685 int Offset) const {
3686 if (Offset <= 0)
3687 return false;
3688
3689 if (Offset % SlotSize)
3690 return false;
3691
3692 int NumPops = Offset / SlotSize;
3693 // This is only worth it if we have at most 2 pops.
3694 if (NumPops != 1 && NumPops != 2)
3695 return false;
3696
3697 // Handle only the trivial case where the adjustment directly follows
3698 // a call. This is the most common one, anyway.
3699 if (MBBI == MBB.begin())
3700 return false;
3701 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3702 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3703 return false;
3704
3705 unsigned Regs[2];
3706 unsigned FoundRegs = 0;
3707
3709 const MachineOperand &RegMask = Prev->getOperand(1);
3710
3711 auto &RegClass =
3712 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3713 // Try to find up to NumPops free registers.
3714 for (auto Candidate : RegClass) {
3715 // Poor man's liveness:
3716 // Since we're immediately after a call, any register that is clobbered
3717 // by the call and not defined by it can be considered dead.
3718 if (!RegMask.clobbersPhysReg(Candidate))
3719 continue;
3720
3721 // Don't clobber reserved registers
3722 if (MRI.isReserved(Candidate))
3723 continue;
3724
3725 bool IsDef = false;
3726 for (const MachineOperand &MO : Prev->implicit_operands()) {
3727 if (MO.isReg() && MO.isDef() &&
3728 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3729 IsDef = true;
3730 break;
3731 }
3732 }
3733
3734 if (IsDef)
3735 continue;
3736
3737 Regs[FoundRegs++] = Candidate;
3738 if (FoundRegs == (unsigned)NumPops)
3739 break;
3740 }
3741
3742 if (FoundRegs == 0)
3743 return false;
3744
3745 // If we found only one free register, but need two, reuse the same one twice.
3746 while (FoundRegs < (unsigned)NumPops)
3747 Regs[FoundRegs++] = Regs[0];
3748
3749 for (int i = 0; i < NumPops; ++i)
3750 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3751 Regs[i]);
3752
3753 return true;
3754}
3755
3759 bool reserveCallFrame = hasReservedCallFrame(MF);
3760 unsigned Opcode = I->getOpcode();
3761 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3762 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3763 uint64_t Amount = TII.getFrameSize(*I);
3764 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3765 I = MBB.erase(I);
3766 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3767
3768 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3769 // typically because the function is marked noreturn (abort, throw,
3770 // assert_fail, etc).
3771 if (isDestroy && blockEndIsUnreachable(MBB, I))
3772 return I;
3773
3774 if (!reserveCallFrame) {
3775 // If the stack pointer can be changed after prologue, turn the
3776 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3777 // adjcallstackdown instruction into 'add ESP, <amt>'
3778
3779 // We need to keep the stack aligned properly. To do this, we round the
3780 // amount of space needed for the outgoing arguments up to the next
3781 // alignment boundary.
3782 Amount = alignTo(Amount, getStackAlign());
3783
3784 const Function &F = MF.getFunction();
3785 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3786 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3787
3788 // If we have any exception handlers in this function, and we adjust
3789 // the SP before calls, we may need to indicate this to the unwinder
3790 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3791 // Amount == 0, because the preceding function may have set a non-0
3792 // GNU_ARGS_SIZE.
3793 // TODO: We don't need to reset this between subsequent functions,
3794 // if it didn't change.
3795 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3796
3797 if (HasDwarfEHHandlers && !isDestroy &&
3799 BuildCFI(MBB, InsertPos, DL,
3800 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3801
3802 if (Amount == 0)
3803 return I;
3804
3805 // Factor out the amount that gets handled inside the sequence
3806 // (Pushes of argument for frame setup, callee pops for frame destroy)
3807 Amount -= InternalAmt;
3808
3809 // TODO: This is needed only if we require precise CFA.
3810 // If this is a callee-pop calling convention, emit a CFA adjust for
3811 // the amount the callee popped.
3812 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3813 BuildCFI(MBB, InsertPos, DL,
3814 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3815
3816 // Add Amount to SP to destroy a frame, or subtract to setup.
3817 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3818
3819 if (StackAdjustment) {
3820 // Merge with any previous or following adjustment instruction. Note: the
3821 // instructions merged with here do not have CFI, so their stack
3822 // adjustments do not feed into CfaAdjustment.
3823 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3824 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3825
3826 if (StackAdjustment) {
3827 if (!(F.hasMinSize() &&
3828 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3829 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3830 /*InEpilogue=*/false);
3831 }
3832 }
3833
3834 if (DwarfCFI && !hasFP(MF)) {
3835 // If we don't have FP, but need to generate unwind information,
3836 // we need to set the correct CFA offset after the stack adjustment.
3837 // How much we adjust the CFA offset depends on whether we're emitting
3838 // CFI only for EH purposes or for debugging. EH only requires the CFA
3839 // offset to be correct at each call site, while for debugging we want
3840 // it to be more precise.
3841
3842 int64_t CfaAdjustment = -StackAdjustment;
3843 // TODO: When not using precise CFA, we also need to adjust for the
3844 // InternalAmt here.
3845 if (CfaAdjustment) {
3846 BuildCFI(
3847 MBB, InsertPos, DL,
3848 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3849 }
3850 }
3851
3852 return I;
3853 }
3854
3855 if (InternalAmt) {
3858 while (CI != B && !std::prev(CI)->isCall())
3859 --CI;
3860 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3861 }
3862
3863 return I;
3864}
3865
3867 assert(MBB.getParent() && "Block is not attached to a function!");
3868 const MachineFunction &MF = *MBB.getParent();
3869 if (!MBB.isLiveIn(X86::EFLAGS))
3870 return true;
3871
3872 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3873 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3875 const X86TargetLowering &TLI = *STI.getTargetLowering();
3876 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3877 return false;
3878
3880 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3881}
3882
3884 assert(MBB.getParent() && "Block is not attached to a function!");
3885
3886 // Win64 has strict requirements in terms of epilogue and we are
3887 // not taking a chance at messing with them.
3888 // I.e., unless this block is already an exit block, we can't use
3889 // it as an epilogue.
3890 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3891 return false;
3892
3893 // Swift async context epilogue has a BTR instruction that clobbers parts of
3894 // EFLAGS.
3895 const MachineFunction &MF = *MBB.getParent();
3898
3900 return true;
3901
3902 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3903 // clobbers the EFLAGS. Check that we do not need to preserve it,
3904 // otherwise, conservatively assume this is not
3905 // safe to insert the epilogue here.
3907}
3908
3910 // If we may need to emit frameless compact unwind information, give
3911 // up as this is currently broken: PR25614.
3912 bool CompactUnwind =
3914 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3915 !CompactUnwind) &&
3916 // The lowering of segmented stack and HiPE only support entry
3917 // blocks as prologue blocks: PR26107. This limitation may be
3918 // lifted if we fix:
3919 // - adjustForSegmentedStacks
3920 // - adjustForHiPEPrologue
3922 !MF.shouldSplitStack();
3923}
3924
3927 const DebugLoc &DL, bool RestoreSP) const {
3928 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3929 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3930 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3931 "restoring EBP/ESI on non-32-bit target");
3932
3933 MachineFunction &MF = *MBB.getParent();
3935 Register BasePtr = TRI->getBaseRegister();
3936 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3938 MachineFrameInfo &MFI = MF.getFrameInfo();
3939
3940 // FIXME: Don't set FrameSetup flag in catchret case.
3941
3942 int FI = FuncInfo.EHRegNodeFrameIndex;
3943 int EHRegSize = MFI.getObjectSize(FI);
3944
3945 if (RestoreSP) {
3946 // MOV32rm -EHRegSize(%ebp), %esp
3947 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3948 X86::EBP, true, -EHRegSize)
3950 }
3951
3952 Register UsedReg;
3953 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3954 int EndOffset = -EHRegOffset - EHRegSize;
3955 FuncInfo.EHRegNodeEndOffset = EndOffset;
3956
3957 if (UsedReg == FramePtr) {
3958 // ADD $offset, %ebp
3959 unsigned ADDri = getADDriOpcode(false);
3960 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3962 .addImm(EndOffset)
3964 ->getOperand(3)
3965 .setIsDead();
3966 assert(EndOffset >= 0 &&
3967 "end of registration object above normal EBP position!");
3968 } else if (UsedReg == BasePtr) {
3969 // LEA offset(%ebp), %esi
3970 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3971 FramePtr, false, EndOffset)
3973 // MOV32rm SavedEBPOffset(%esi), %ebp
3974 assert(X86FI->getHasSEHFramePtrSave());
3975 int Offset =
3976 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3977 .getFixed();
3978 assert(UsedReg == BasePtr);
3979 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3980 UsedReg, true, Offset)
3982 } else {
3983 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3984 }
3985 return MBBI;
3986}
3987
3989 return TRI->getSlotSize();
3990}
3991
3994 return StackPtr;
3995}
3996
4000 Register FrameRegister = RI->getFrameRegister(MF);
4001 if (getInitialCFARegister(MF) == FrameRegister &&
4003 DwarfFrameBase FrameBase;
4004 FrameBase.Kind = DwarfFrameBase::CFA;
4005 FrameBase.Location.Offset =
4007 return FrameBase;
4008 }
4009
4010 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
4011}
4012
4013namespace {
4014// Struct used by orderFrameObjects to help sort the stack objects.
4015struct X86FrameSortingObject {
4016 bool IsValid = false; // true if we care about this Object.
4017 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4018 unsigned ObjectSize = 0; // Size of Object in bytes.
4019 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4020 unsigned ObjectNumUses = 0; // Object static number of uses.
4021};
4022
4023// The comparison function we use for std::sort to order our local
4024// stack symbols. The current algorithm is to use an estimated
4025// "density". This takes into consideration the size and number of
4026// uses each object has in order to roughly minimize code size.
4027// So, for example, an object of size 16B that is referenced 5 times
4028// will get higher priority than 4 4B objects referenced 1 time each.
4029// It's not perfect and we may be able to squeeze a few more bytes out of
4030// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4031// fringe end can have special consideration, given their size is less
4032// important, etc.), but the algorithmic complexity grows too much to be
4033// worth the extra gains we get. This gets us pretty close.
4034// The final order leaves us with objects with highest priority going
4035// at the end of our list.
4036struct X86FrameSortingComparator {
4037 inline bool operator()(const X86FrameSortingObject &A,
4038 const X86FrameSortingObject &B) const {
4039 uint64_t DensityAScaled, DensityBScaled;
4040
4041 // For consistency in our comparison, all invalid objects are placed
4042 // at the end. This also allows us to stop walking when we hit the
4043 // first invalid item after it's all sorted.
4044 if (!A.IsValid)
4045 return false;
4046 if (!B.IsValid)
4047 return true;
4048
4049 // The density is calculated by doing :
4050 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4051 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4052 // Since this approach may cause inconsistencies in
4053 // the floating point <, >, == comparisons, depending on the floating
4054 // point model with which the compiler was built, we're going
4055 // to scale both sides by multiplying with
4056 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4057 // the division and, with it, the need for any floating point
4058 // arithmetic.
4059 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4060 static_cast<uint64_t>(B.ObjectSize);
4061 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4062 static_cast<uint64_t>(A.ObjectSize);
4063
4064 // If the two densities are equal, prioritize highest alignment
4065 // objects. This allows for similar alignment objects
4066 // to be packed together (given the same density).
4067 // There's room for improvement here, also, since we can pack
4068 // similar alignment (different density) objects next to each
4069 // other to save padding. This will also require further
4070 // complexity/iterations, and the overall gain isn't worth it,
4071 // in general. Something to keep in mind, though.
4072 if (DensityAScaled == DensityBScaled)
4073 return A.ObjectAlignment < B.ObjectAlignment;
4074
4075 return DensityAScaled < DensityBScaled;
4076 }
4077};
4078} // namespace
4079
4080// Order the symbols in the local stack.
4081// We want to place the local stack objects in some sort of sensible order.
4082// The heuristic we use is to try and pack them according to static number
4083// of uses and size of object in order to minimize code size.
4085 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4086 const MachineFrameInfo &MFI = MF.getFrameInfo();
4087
4088 // Don't waste time if there's nothing to do.
4089 if (ObjectsToAllocate.empty())
4090 return;
4091
4092 // Create an array of all MFI objects. We won't need all of these
4093 // objects, but we're going to create a full array of them to make
4094 // it easier to index into when we're counting "uses" down below.
4095 // We want to be able to easily/cheaply access an object by simply
4096 // indexing into it, instead of having to search for it every time.
4097 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4098
4099 // Walk the objects we care about and mark them as such in our working
4100 // struct.
4101 for (auto &Obj : ObjectsToAllocate) {
4102 SortingObjects[Obj].IsValid = true;
4103 SortingObjects[Obj].ObjectIndex = Obj;
4104 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4105 // Set the size.
4106 int ObjectSize = MFI.getObjectSize(Obj);
4107 if (ObjectSize == 0)
4108 // Variable size. Just use 4.
4109 SortingObjects[Obj].ObjectSize = 4;
4110 else
4111 SortingObjects[Obj].ObjectSize = ObjectSize;
4112 }
4113
4114 // Count the number of uses for each object.
4115 for (auto &MBB : MF) {
4116 for (auto &MI : MBB) {
4117 if (MI.isDebugInstr())
4118 continue;
4119 for (const MachineOperand &MO : MI.operands()) {
4120 // Check to see if it's a local stack symbol.
4121 if (!MO.isFI())
4122 continue;
4123 int Index = MO.getIndex();
4124 // Check to see if it falls within our range, and is tagged
4125 // to require ordering.
4126 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4127 SortingObjects[Index].IsValid)
4128 SortingObjects[Index].ObjectNumUses++;
4129 }
4130 }
4131 }
4132
4133 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4134 // info).
4135 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4136
4137 // Now modify the original list to represent the final order that
4138 // we want. The order will depend on whether we're going to access them
4139 // from the stack pointer or the frame pointer. For SP, the list should
4140 // end up with the END containing objects that we want with smaller offsets.
4141 // For FP, it should be flipped.
4142 int i = 0;
4143 for (auto &Obj : SortingObjects) {
4144 // All invalid items are sorted at the end, so it's safe to stop.
4145 if (!Obj.IsValid)
4146 break;
4147 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4148 }
4149
4150 // Flip it if we're accessing off of the FP.
4151 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4152 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4153}
4154
4155unsigned
4157 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4158 unsigned Offset = 16;
4159 // RBP is immediately pushed.
4160 Offset += SlotSize;
4161 // All callee-saved registers are then pushed.
4162 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4163 // Every funclet allocates enough stack space for the largest outgoing call.
4164 Offset += getWinEHFuncletFrameSize(MF);
4165 return Offset;
4166}
4167
4169 MachineFunction &MF, RegScavenger *RS) const {
4170 // Mark the function as not having WinCFI. We will set it back to true in
4171 // emitPrologue if it gets called and emits CFI.
4172 MF.setHasWinCFI(false);
4173
4174 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4175 // aligned. The format doesn't support misaligned stack adjustments.
4178
4179 // If this function isn't doing Win64-style C++ EH, we don't need to do
4180 // anything.
4181 if (STI.is64Bit() && MF.hasEHFunclets() &&
4184 adjustFrameForMsvcCxxEh(MF);
4185 }
4186}
4187
4188void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4189 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4190 // relative to RSP after the prologue. Find the offset of the last fixed
4191 // object, so that we can allocate a slot immediately following it. If there
4192 // were no fixed objects, use offset -SlotSize, which is immediately after the
4193 // return address. Fixed objects have negative frame indices.
4194 MachineFrameInfo &MFI = MF.getFrameInfo();
4195 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4196 int64_t MinFixedObjOffset = -SlotSize;
4197 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4198 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4199
4200 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4201 for (WinEHHandlerType &H : TBME.HandlerArray) {
4202 int FrameIndex = H.CatchObj.FrameIndex;
4203 if (FrameIndex != INT_MAX) {
4204 // Ensure alignment.
4205 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4206 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4207 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4208 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4209 }
4210 }
4211 }
4212
4213 // Ensure alignment.
4214 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4215 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4216 int UnwindHelpFI =
4217 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4218 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4219
4220 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4221 // other frame setup instructions.
4222 MachineBasicBlock &MBB = MF.front();
4223 auto MBBI = MBB.begin();
4224 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4225 ++MBBI;
4226
4228 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4229 UnwindHelpFI)
4230 .addImm(-2);
4231}
4232
4234 MachineFunction &MF, RegScavenger *RS) const {
4235 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4236
4237 if (STI.is32Bit() && MF.hasEHFunclets())
4239 // We have emitted prolog and epilog. Don't need stack pointer saving
4240 // instruction any more.
4241 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4242 MI->eraseFromParent();
4243 X86FI->setStackPtrSaveMI(nullptr);
4244 }
4245}
4246
4248 MachineFunction &MF) const {
4249 // 32-bit functions have to restore stack pointers when control is transferred
4250 // back to the parent function. These blocks are identified as eh pads that
4251 // are not funclet entries.
4252 bool IsSEH = isAsynchronousEHPersonality(
4254 for (MachineBasicBlock &MBB : MF) {
4255 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4256 if (NeedsRestore)
4258 /*RestoreSP=*/IsSEH);
4259 }
4260}
4261
4262// Compute the alignment gap between current SP after spilling FP/BP and the
4263// next properly aligned stack offset.
4265 const TargetRegisterClass *RC,
4266 unsigned NumSpilledRegs) {
4268 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4269 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4270 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4271 return AlignedSize - AllocSize;
4272}
4273
4274void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4276 Register FP, Register BP,
4277 int SPAdjust) const {
4278 assert(FP.isValid() || BP.isValid());
4279
4280 MachineBasicBlock *MBB = BeforeMI->getParent();
4281 DebugLoc DL = BeforeMI->getDebugLoc();
4282
4283 // Spill FP.
4284 if (FP.isValid()) {
4285 BuildMI(*MBB, BeforeMI, DL,
4287 .addReg(FP);
4288 }
4289
4290 // Spill BP.
4291 if (BP.isValid()) {
4292 BuildMI(*MBB, BeforeMI, DL,
4294 .addReg(BP);
4295 }
4296
4297 // Make sure SP is aligned.
4298 if (SPAdjust)
4299 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4300
4301 // Emit unwinding information.
4302 if (FP.isValid() && needsDwarfCFI(MF)) {
4303 // Emit .cfi_remember_state to remember old frame.
4304 unsigned CFIIndex =
4306 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4307 .addCFIIndex(CFIIndex);
4308
4309 // Setup new CFA value with DW_CFA_def_cfa_expression:
4310 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4311 SmallString<64> CfaExpr;
4312 uint8_t buffer[16];
4313 int Offset = SPAdjust;
4314 if (BP.isValid())
4315 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4316 // If BeforeMI is a frame setup instruction, we need to adjust the position
4317 // and offset of the new cfi instruction.
4318 if (TII.isFrameSetup(*BeforeMI)) {
4319 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4320 BeforeMI = std::next(BeforeMI);
4321 }
4323 if (STI.isTarget64BitILP32())
4325 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4326 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4327 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4328 CfaExpr.push_back(dwarf::DW_OP_deref);
4329 CfaExpr.push_back(dwarf::DW_OP_consts);
4330 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4331 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4332
4333 SmallString<64> DefCfaExpr;
4334 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4335 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4336 DefCfaExpr.append(CfaExpr.str());
4337 BuildCFI(*MBB, BeforeMI, DL,
4338 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4340 }
4341}
4342
4343void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4345 Register FP, Register BP,
4346 int SPAdjust) const {
4347 assert(FP.isValid() || BP.isValid());
4348
4349 // Adjust SP so it points to spilled FP or BP.
4350 MachineBasicBlock *MBB = AfterMI->getParent();
4351 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4352 DebugLoc DL = AfterMI->getDebugLoc();
4353 if (SPAdjust)
4354 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4355
4356 // Restore BP.
4357 if (BP.isValid()) {
4358 BuildMI(*MBB, Pos, DL,
4359 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4360 }
4361
4362 // Restore FP.
4363 if (FP.isValid()) {
4364 BuildMI(*MBB, Pos, DL,
4366
4367 // Emit unwinding information.
4368 if (needsDwarfCFI(MF)) {
4369 // Restore original frame with .cfi_restore_state.
4370 unsigned CFIIndex =
4372 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4373 .addCFIIndex(CFIIndex);
4374 }
4375 }
4376}
4377
4378void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4380 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4381 assert(SpillFP || SpillBP);
4382
4383 Register FP, BP;
4384 const TargetRegisterClass *RC;
4385 unsigned NumRegs = 0;
4386
4387 if (SpillFP) {
4388 FP = TRI->getFrameRegister(MF);
4389 if (STI.isTarget64BitILP32())
4391 RC = TRI->getMinimalPhysRegClass(FP);
4392 ++NumRegs;
4393 }
4394 if (SpillBP) {
4395 BP = TRI->getBaseRegister();
4396 if (STI.isTarget64BitILP32())
4397 BP = Register(getX86SubSuperRegister(BP, 64));
4398 RC = TRI->getMinimalPhysRegClass(BP);
4399 ++NumRegs;
4400 }
4401 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4402
4403 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4404 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4405}
4406
4407bool X86FrameLowering::skipSpillFPBP(
4409 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4410 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4411 // SaveRbx = COPY RBX
4412 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4413 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4414 // We should skip this instruction sequence.
4415 int FI;
4416 unsigned Reg;
4417 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4418 MI->getOperand(1).getReg() == X86::RBX) &&
4419 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4420 ++MI;
4421 return true;
4422 }
4423 return false;
4424}
4425
4427 const TargetRegisterInfo *TRI, bool &AccessFP,
4428 bool &AccessBP) {
4429 AccessFP = AccessBP = false;
4430 if (FP) {
4431 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4432 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4433 AccessFP = true;
4434 }
4435 if (BP) {
4436 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4437 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4438 AccessBP = true;
4439 }
4440 return AccessFP || AccessBP;
4441}
4442
4443// Invoke instruction has been lowered to normal function call. We try to figure
4444// out if MI comes from Invoke.
4445// Do we have any better method?
4446static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4447 if (!MI.isCall())
4448 return false;
4449 if (InsideEHLabels)
4450 return true;
4451
4452 const MachineBasicBlock *MBB = MI.getParent();
4453 if (!MBB->hasEHPadSuccessor())
4454 return false;
4455
4456 // Check if there is another call instruction from MI to the end of MBB.
4458 for (++MBBI; MBBI != ME; ++MBBI)
4459 if (MBBI->isCall())
4460 return false;
4461 return true;
4462}
4463
4464/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4465/// interfered stack access in the range, usually generated by register spill.
4466void X86FrameLowering::checkInterferedAccess(
4468 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4469 bool SpillBP) const {
4470 if (DefMI == KillMI)
4471 return;
4472 if (TRI->hasBasePointer(MF)) {
4473 if (!SpillBP)
4474 return;
4475 } else {
4476 if (!SpillFP)
4477 return;
4478 }
4479
4480 auto MI = KillMI;
4481 while (MI != DefMI) {
4482 if (any_of(MI->operands(),
4483 [](const MachineOperand &MO) { return MO.isFI(); }))
4485 "Interference usage of base pointer/frame "
4486 "pointer.");
4487 MI++;
4488 }
4489}
4490
4491/// If a function uses base pointer and the base pointer is clobbered by inline
4492/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4493/// contains garbage value.
4494/// For example if a 32b x86 function uses base pointer esi, and esi is
4495/// clobbered by following inline asm
4496/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4497/// We need to save esi before the asm and restore it after the asm.
4498///
4499/// The problem can also occur to frame pointer if there is a function call, and
4500/// the callee uses a different calling convention and clobbers the fp.
4501///
4502/// Because normal frame objects (spill slots) are accessed through fp/bp
4503/// register, so we can't spill fp/bp to normal spill slots.
4504///
4505/// FIXME: There are 2 possible enhancements:
4506/// 1. In many cases there are different physical registers not clobbered by
4507/// inline asm, we can use one of them as base pointer. Or use a virtual
4508/// register as base pointer and let RA allocate a physical register to it.
4509/// 2. If there is no other instructions access stack with fp/bp from the
4510/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4511/// skip the save and restore operations.
4513 Register FP, BP;
4515 if (TFI.hasFP(MF))
4516 FP = TRI->getFrameRegister(MF);
4517 if (TRI->hasBasePointer(MF))
4518 BP = TRI->getBaseRegister();
4519
4520 // Currently only inline asm and function call can clobbers fp/bp. So we can
4521 // do some quick test and return early.
4522 if (!MF.hasInlineAsm()) {
4524 if (!X86FI->getFPClobberedByCall())
4525 FP = 0;
4526 if (!X86FI->getBPClobberedByCall())
4527 BP = 0;
4528 }
4529 if (!FP && !BP)
4530 return;
4531
4532 for (MachineBasicBlock &MBB : MF) {
4533 bool InsideEHLabels = false;
4534 auto MI = MBB.rbegin(), ME = MBB.rend();
4535 auto TermMI = MBB.getFirstTerminator();
4536 if (TermMI == MBB.begin())
4537 continue;
4538 MI = *(std::prev(TermMI));
4539
4540 while (MI != ME) {
4541 // Skip frame setup/destroy instructions.
4542 // Skip Invoke (call inside try block) instructions.
4543 // Skip instructions handled by target.
4544 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4546 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4547 ++MI;
4548 continue;
4549 }
4550
4551 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4552 InsideEHLabels = !InsideEHLabels;
4553 ++MI;
4554 continue;
4555 }
4556
4557 bool AccessFP, AccessBP;
4558 // Check if fp or bp is used in MI.
4559 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4560 ++MI;
4561 continue;
4562 }
4563
4564 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4565 // used.
4566 bool FPLive = false, BPLive = false;
4567 bool SpillFP = false, SpillBP = false;
4568 auto DefMI = MI, KillMI = MI;
4569 do {
4570 SpillFP |= AccessFP;
4571 SpillBP |= AccessBP;
4572
4573 // Maintain FPLive and BPLive.
4574 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4575 FPLive = false;
4576 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4577 FPLive = true;
4578 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4579 BPLive = false;
4580 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4581 BPLive = true;
4582
4583 DefMI = MI++;
4584 } while ((MI != ME) &&
4585 (FPLive || BPLive ||
4586 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4587
4588 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4589 if (FPLive && !SpillBP)
4590 continue;
4591
4592 // If the bp is clobbered by a call, we should save and restore outside of
4593 // the frame setup instructions.
4594 if (KillMI->isCall() && DefMI != ME) {
4595 auto FrameSetup = std::next(DefMI);
4596 // Look for frame setup instruction toward the start of the BB.
4597 // If we reach another call instruction, it means no frame setup
4598 // instruction for the current call instruction.
4599 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4600 !FrameSetup->isCall())
4601 ++FrameSetup;
4602 // If a frame setup instruction is found, we need to find out the
4603 // corresponding frame destroy instruction.
4604 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4605 (TII.getFrameSize(*FrameSetup) ||
4606 TII.getFrameAdjustment(*FrameSetup))) {
4607 while (!TII.isFrameInstr(*KillMI))
4608 --KillMI;
4609 DefMI = FrameSetup;
4610 MI = DefMI;
4611 ++MI;
4612 }
4613 }
4614
4615 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4616
4617 // Call target function to spill and restore FP and BP registers.
4618 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4619 }
4620 }
4621}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isFuncletReturnInstr(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr Register SPReg
static constexpr Register FPReg
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:160
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
reverse_iterator rbegin() const
Definition: ArrayRef.h:159
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:905
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1048
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
size_t arg_size() const
Definition: Function.h:901
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:682
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:234
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:661
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:582
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:693
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:656
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:575
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition: MCDwarf.h:676
OpType getOperation() const
Definition: MCDwarf.h:710
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:687
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:598
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition: MCDwarf.h:681
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1072
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1073
A single uniqued string.
Definition: Metadata.h:724
StringRef getString() const
Definition: Metadata.cpp:616
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:71
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:580
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:501
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:297
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:597
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
A tuple of MDNodes.
Definition: Metadata.h:1737
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1425
unsigned getNumOperands() const
Definition: Metadata.cpp:1421
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:115
Represents a location in source code.
Definition: SMLoc.h:23
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
static constexpr size_t npos
Definition: StringRef.h:53
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:652
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:585
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:454
Value * getValue() const
Definition: Metadata.h:494
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:327
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:118
bool isTargetDragonFly() const
Definition: X86Subtarget.h:287
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:305
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:285
bool isTargetWin64() const
Definition: X86Subtarget.h:329
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:178
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:391
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:309
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:342
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:286
bool isTargetNaCl64() const
Definition: X86Subtarget.h:301
bool isTargetWin32() const
Definition: X86Subtarget.h:331
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:218
bool isTargetLinux() const
Definition: X86Subtarget.h:295
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:557
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:195
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
union llvm::TargetFrameLowering::DwarfFrameBase::@248 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76