LLVM 22.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Module.h"
33#include "llvm/MC/MCAsmInfo.h"
35#include "llvm/MC/MCSymbol.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2");
46
47using namespace llvm;
48
50 MaybeAlign StackAlignOverride)
51 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
52 STI.is64Bit() ? -8 : -4),
53 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
54 // Cache a bunch of frame-related predicates for this subtarget.
55 SlotSize = TRI->getSlotSize();
56 assert(SlotSize == 4 || SlotSize == 8);
57 Is64Bit = STI.is64Bit();
58 IsLP64 = STI.isTarget64BitLP64();
59 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
60 Uses64BitFramePtr = STI.isTarget64BitLP64();
61 StackPtr = TRI->getStackRegister();
62}
63
65 return !MF.getFrameInfo().hasVarSizedObjects() &&
66 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
67 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
68}
69
70/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
71/// call frame pseudos can be simplified. Having a FP, as in the default
72/// implementation, is not sufficient here since we can't always use it.
73/// Use a more nuanced condition.
75 const MachineFunction &MF) const {
76 return hasReservedCallFrame(MF) ||
77 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
78 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
79 TRI->hasBasePointer(MF);
80}
81
82// needsFrameIndexResolution - Do we need to perform FI resolution for
83// this function. Normally, this is required only when the function
84// has any stack objects. However, FI resolution actually has another job,
85// not apparent from the title - it resolves callframesetup/destroy
86// that were not simplified earlier.
87// So, this is required for x86 functions that have push sequences even
88// when there are no stack objects.
90 const MachineFunction &MF) const {
91 return MF.getFrameInfo().hasStackObjects() ||
92 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
93}
94
95/// hasFPImpl - Return true if the specified function should have a dedicated
96/// frame pointer register. This is true if the function has variable sized
97/// allocas or if frame pointer elimination is disabled.
99 const MachineFrameInfo &MFI = MF.getFrameInfo();
100 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
101 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
105 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
106 MFI.hasStackMap() || MFI.hasPatchPoint() ||
107 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
108}
109
110static unsigned getSUBriOpcode(bool IsLP64) {
111 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
112}
113
114static unsigned getADDriOpcode(bool IsLP64) {
115 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
116}
117
118static unsigned getSUBrrOpcode(bool IsLP64) {
119 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
120}
121
122static unsigned getADDrrOpcode(bool IsLP64) {
123 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
124}
125
126static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
127 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
128}
129
130static unsigned getLEArOpcode(bool IsLP64) {
131 return IsLP64 ? X86::LEA64r : X86::LEA32r;
132}
133
134static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
135 if (Use64BitReg) {
136 if (isUInt<32>(Imm))
137 return X86::MOV32ri64;
138 if (isInt<32>(Imm))
139 return X86::MOV64ri32;
140 return X86::MOV64ri;
141 }
142 return X86::MOV32ri;
143}
144
145// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
146// value written by the PUSH from the stack. The processor tracks these marked
147// instructions internally and fast-forwards register data between matching PUSH
148// and POP instructions, without going through memory or through the training
149// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
150// memory-renaming optimization can be used.
151//
152// The PPX hint is purely a performance hint. Instructions with this hint have
153// the same functional semantics as those without. PPX hints set by the
154// compiler that violate the balancing rule may turn off the PPX optimization,
155// but they will not affect program semantics.
156//
157// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
158// are not considered).
159//
160// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
161// GPRs at a time to/from the stack.
162static unsigned getPUSHOpcode(const X86Subtarget &ST) {
163 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
164 : X86::PUSH32r;
165}
166static unsigned getPOPOpcode(const X86Subtarget &ST) {
167 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
168 : X86::POP32r;
169}
170static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
171 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
172}
173static unsigned getPOP2Opcode(const X86Subtarget &ST) {
174 return ST.hasPPX() ? X86::POP2P : X86::POP2;
175}
176
178 for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
179 MCRegister Reg = RegMask.PhysReg;
180
181 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
182 Reg == X86::AH || Reg == X86::AL)
183 return true;
184 }
185
186 return false;
187}
188
189/// Check if the flags need to be preserved before the terminators.
190/// This would be the case, if the eflags is live-in of the region
191/// composed by the terminators or live-out of that region, without
192/// being defined by a terminator.
193static bool
195 for (const MachineInstr &MI : MBB.terminators()) {
196 bool BreakNext = false;
197 for (const MachineOperand &MO : MI.operands()) {
198 if (!MO.isReg())
199 continue;
200 Register Reg = MO.getReg();
201 if (Reg != X86::EFLAGS)
202 continue;
203
204 // This terminator needs an eflags that is not defined
205 // by a previous another terminator:
206 // EFLAGS is live-in of the region composed by the terminators.
207 if (!MO.isDef())
208 return true;
209 // This terminator defines the eflags, i.e., we don't need to preserve it.
210 // However, we still need to check this specific terminator does not
211 // read a live-in value.
212 BreakNext = true;
213 }
214 // We found a definition of the eflags, no need to preserve them.
215 if (BreakNext)
216 return false;
217 }
218
219 // None of the terminators use or define the eflags.
220 // Check if they are live-out, that would imply we need to preserve them.
221 for (const MachineBasicBlock *Succ : MBB.successors())
222 if (Succ->isLiveIn(X86::EFLAGS))
223 return true;
224
225 return false;
226}
227
228constexpr uint64_t MaxSPChunk = (1ULL << 31) - 1;
229
230/// emitSPUpdate - Emit a series of instructions to increment / decrement the
231/// stack pointer by a constant value.
234 const DebugLoc &DL, int64_t NumBytes,
235 bool InEpilogue) const {
236 bool isSub = NumBytes < 0;
237 uint64_t Offset = isSub ? -NumBytes : NumBytes;
240
242 // We're being asked to adjust a 32-bit stack pointer by 4 GiB or more.
243 // This might be unreachable code, so don't complain now; just trap if
244 // it's reached at runtime.
245 BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
246 return;
247 }
248
249 MachineFunction &MF = *MBB.getParent();
251 const X86TargetLowering &TLI = *STI.getTargetLowering();
252 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
253
254 // It's ok to not take into account large chunks when probing, as the
255 // allocation is split in smaller chunks anyway.
256 if (EmitInlineStackProbe && !InEpilogue) {
257
258 // This pseudo-instruction is going to be expanded, potentially using a
259 // loop, by inlineStackProbe().
260 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
261 return;
262 } else if (Offset > MaxSPChunk) {
263 // Rather than emit a long series of instructions for large offsets,
264 // load the offset into a register and do one sub/add
265 unsigned Reg = 0;
266 unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
267
268 if (isSub && !isEAXLiveIn(MBB))
269 Reg = Rax;
270 else
271 Reg = getX86SubSuperRegister(TRI->findDeadCallerSavedReg(MBB, MBBI),
272 Uses64BitFramePtr ? 64 : 32);
273
274 unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr)
276 if (Reg) {
278 Reg)
279 .addImm(Offset)
280 .setMIFlag(Flag);
281 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
283 .addReg(Reg);
284 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
285 return;
286 } else if (Offset > 8 * MaxSPChunk) {
287 // If we would need more than 8 add or sub instructions (a >16GB stack
288 // frame), it's worth spilling RAX to materialize this immediate.
289 // pushq %rax
290 // movabsq +-$Offset+-SlotSize, %rax
291 // addq %rsp, %rax
292 // xchg %rax, (%rsp)
293 // movq (%rsp), %rsp
294 assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
295 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
297 .setMIFlag(Flag);
298 // Subtract is not commutative, so negate the offset and always use add.
299 // Subtract 8 less and add 8 more to account for the PUSH we just did.
300 if (isSub)
301 Offset = -(Offset - SlotSize);
302 else
305 Rax)
306 .addImm(Offset)
307 .setMIFlag(Flag);
308 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
309 .addReg(Rax)
311 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
312 // Exchange the new SP in RAX with the top of the stack.
314 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
315 StackPtr, false, 0);
316 // Load new SP from the top of the stack into RSP.
317 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
318 StackPtr, false, 0);
319 return;
320 }
321 }
322
323 while (Offset) {
324 if (Offset == SlotSize) {
325 // Use push / pop for slot sized adjustments as a size optimization. We
326 // need to find a dead register when using pop.
327 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
328 : TRI->findDeadCallerSavedReg(MBB, MBBI);
329 if (Reg) {
330 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
331 : (Is64Bit ? X86::POP64r : X86::POP32r);
332 BuildMI(MBB, MBBI, DL, TII.get(Opc))
333 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
334 .setMIFlag(Flag);
335 return;
336 }
337 }
338
339 uint64_t ThisVal = std::min(Offset, MaxSPChunk);
340
341 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
342 .setMIFlag(Flag);
343
344 Offset -= ThisVal;
345 }
346}
347
348MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
350 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
351 assert(Offset != 0 && "zero offset stack adjustment requested");
352
353 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
354 // is tricky.
355 bool UseLEA;
356 if (!InEpilogue) {
357 // Check if inserting the prologue at the beginning
358 // of MBB would require to use LEA operations.
359 // We need to use LEA operations if EFLAGS is live in, because
360 // it means an instruction will read it before it gets defined.
361 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
362 } else {
363 // If we can use LEA for SP but we shouldn't, check that none
364 // of the terminators uses the eflags. Otherwise we will insert
365 // a ADD that will redefine the eflags and break the condition.
366 // Alternatively, we could move the ADD, but this may not be possible
367 // and is an optimization anyway.
368 UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
369 if (UseLEA && !STI.useLeaForSP())
371 // If that assert breaks, that means we do not do the right thing
372 // in canUseAsEpilogue.
374 "We shouldn't have allowed this insertion point");
375 }
376
377 MachineInstrBuilder MI;
378 if (UseLEA) {
381 StackPtr),
382 StackPtr, false, Offset);
383 } else {
384 bool IsSub = Offset < 0;
385 uint64_t AbsOffset = IsSub ? -Offset : Offset;
386 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
388 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
390 .addImm(AbsOffset);
391 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
392 }
393 return MI;
394}
395
396template <typename FoundT, typename CalcT>
397int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
399 FoundT FoundStackAdjust,
400 CalcT CalcNewOffset,
401 bool doMergeWithPrevious) const {
402 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
403 (!doMergeWithPrevious && MBBI == MBB.end()))
404 return CalcNewOffset(0);
405
406 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
407
409 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
410 // instruction, and that there are no DBG_VALUE or other instructions between
411 // ADD/SUB/LEA and its corresponding CFI instruction.
412 /* TODO: Add support for the case where there are multiple CFI instructions
413 below the ADD/SUB/LEA, e.g.:
414 ...
415 add
416 cfi_def_cfa_offset
417 cfi_offset
418 ...
419 */
420 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
421 PI = std::prev(PI);
422
423 int64_t Offset = 0;
424 for (;;) {
425 unsigned Opc = PI->getOpcode();
426
427 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
428 PI->getOperand(0).getReg() == StackPtr) {
429 assert(PI->getOperand(1).getReg() == StackPtr);
430 Offset = PI->getOperand(2).getImm();
431 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
432 PI->getOperand(0).getReg() == StackPtr &&
433 PI->getOperand(1).getReg() == StackPtr &&
434 PI->getOperand(2).getImm() == 1 &&
435 PI->getOperand(3).getReg() == X86::NoRegister &&
436 PI->getOperand(5).getReg() == X86::NoRegister) {
437 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
438 Offset = PI->getOperand(4).getImm();
439 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
440 PI->getOperand(0).getReg() == StackPtr) {
441 assert(PI->getOperand(1).getReg() == StackPtr);
442 Offset = -PI->getOperand(2).getImm();
443 } else
444 return CalcNewOffset(0);
445
446 FoundStackAdjust(PI, Offset);
447 if ((uint64_t)std::abs((int64_t)CalcNewOffset(Offset)) < MaxSPChunk)
448 break;
449
450 if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
451 return CalcNewOffset(0);
452
453 PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
454 }
455
456 PI = MBB.erase(PI);
457 if (PI != MBB.end() && PI->isCFIInstruction()) {
458 auto CIs = MBB.getParent()->getFrameInstructions();
459 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
462 PI = MBB.erase(PI);
463 }
464 if (!doMergeWithPrevious)
466
467 return CalcNewOffset(Offset);
468}
469
472 int64_t AddOffset,
473 bool doMergeWithPrevious) const {
474 return mergeSPUpdates(
475 MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; },
476 doMergeWithPrevious);
477}
478
481 const DebugLoc &DL,
482 const MCCFIInstruction &CFIInst,
483 MachineInstr::MIFlag Flag) const {
484 MachineFunction &MF = *MBB.getParent();
485 unsigned CFIIndex = MF.addFrameInst(CFIInst);
486
488 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
489
490 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
491 .addCFIIndex(CFIIndex)
492 .setMIFlag(Flag);
493}
494
495/// Emits Dwarf Info specifying offsets of callee saved registers and
496/// frame pointer. This is called only when basic block sections are enabled.
499 MachineFunction &MF = *MBB.getParent();
500 if (!hasFP(MF)) {
502 return;
503 }
505 const Register FramePtr = TRI->getFrameRegister(MF);
506 const Register MachineFramePtr =
507 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
508 : FramePtr;
509 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
510 // Offset = space for return address + size of the frame pointer itself.
511 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
513 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
515}
516
519 const DebugLoc &DL, bool IsPrologue) const {
520 MachineFunction &MF = *MBB.getParent();
521 MachineFrameInfo &MFI = MF.getFrameInfo();
524
525 // Add callee saved registers to move list.
526 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
527
528 // Calculate offsets.
529 for (const CalleeSavedInfo &I : CSI) {
530 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
531 MCRegister Reg = I.getReg();
532 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
533
534 if (IsPrologue) {
535 if (X86FI->getStackPtrSaveMI()) {
536 // +2*SlotSize because there is return address and ebp at the bottom
537 // of the stack.
538 // | retaddr |
539 // | ebp |
540 // | |<--ebp
541 Offset += 2 * SlotSize;
542 SmallString<64> CfaExpr;
543 CfaExpr.push_back(dwarf::DW_CFA_expression);
544 uint8_t buffer[16];
545 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
546 CfaExpr.push_back(2);
547 Register FramePtr = TRI->getFrameRegister(MF);
548 const Register MachineFramePtr =
549 STI.isTarget64BitILP32()
551 : FramePtr;
552 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
553 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
554 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
556 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
558 } else {
560 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
561 }
562 } else {
564 MCCFIInstruction::createRestore(nullptr, DwarfReg));
565 }
566 }
567 if (auto *MI = X86FI->getStackPtrSaveMI()) {
568 int FI = MI->getOperand(1).getIndex();
569 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
570 SmallString<64> CfaExpr;
571 Register FramePtr = TRI->getFrameRegister(MF);
572 const Register MachineFramePtr =
573 STI.isTarget64BitILP32()
575 : FramePtr;
576 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
577 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
578 uint8_t buffer[16];
579 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
580 CfaExpr.push_back(dwarf::DW_OP_deref);
581
582 SmallString<64> DefCfaExpr;
583 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
584 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
585 DefCfaExpr.append(CfaExpr.str());
586 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
588 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
590 }
591}
592
593void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
594 MachineBasicBlock &MBB) const {
595 const MachineFunction &MF = *MBB.getParent();
596
597 // Insertion point.
598 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
599
600 // Fake a debug loc.
601 DebugLoc DL;
602 if (MBBI != MBB.end())
603 DL = MBBI->getDebugLoc();
604
605 // Zero out FP stack if referenced. Do this outside of the loop below so that
606 // it's done only once.
607 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
608 for (MCRegister Reg : RegsToZero.set_bits()) {
609 if (!X86::RFP80RegClass.contains(Reg))
610 continue;
611
612 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
613 for (unsigned i = 0; i != NumFPRegs; ++i)
614 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
615
616 for (unsigned i = 0; i != NumFPRegs; ++i)
617 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
618 break;
619 }
620
621 // For GPRs, we only care to clear out the 32-bit register.
622 BitVector GPRsToZero(TRI->getNumRegs());
623 for (MCRegister Reg : RegsToZero.set_bits())
624 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
625 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
626 RegsToZero.reset(Reg);
627 }
628
629 // Zero out the GPRs first.
630 for (MCRegister Reg : GPRsToZero.set_bits())
631 TII.buildClearRegister(Reg, MBB, MBBI, DL);
632
633 // Zero out the remaining registers.
634 for (MCRegister Reg : RegsToZero.set_bits())
635 TII.buildClearRegister(Reg, MBB, MBBI, DL);
636}
637
640 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
641 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
643 if (STI.isTargetWindowsCoreCLR()) {
644 if (InProlog) {
645 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
646 .addImm(0 /* no explicit stack size */);
647 } else {
648 emitStackProbeInline(MF, MBB, MBBI, DL, false);
649 }
650 } else {
651 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
652 }
653}
654
656 return STI.isOSWindows() && !STI.isTargetWin64();
657}
658
660 MachineBasicBlock &PrologMBB) const {
661 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
662 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
663 });
664 if (Where != PrologMBB.end()) {
665 DebugLoc DL = PrologMBB.findDebugLoc(Where);
666 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
667 Where->eraseFromParent();
668 }
669}
670
671void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
674 const DebugLoc &DL,
675 bool InProlog) const {
677 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
678 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
679 else
680 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
681}
682
683void X86FrameLowering::emitStackProbeInlineGeneric(
685 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
686 MachineInstr &AllocWithProbe = *MBBI;
687 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
688
691 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
692 "different expansion expected for CoreCLR 64 bit");
693
694 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
695 uint64_t ProbeChunk = StackProbeSize * 8;
696
697 uint64_t MaxAlign =
698 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
699
700 // Synthesize a loop or unroll it, depending on the number of iterations.
701 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
702 // between the unaligned rsp and current rsp.
703 if (Offset > ProbeChunk) {
704 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
705 MaxAlign % StackProbeSize);
706 } else {
707 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
708 MaxAlign % StackProbeSize);
709 }
710}
711
712void X86FrameLowering::emitStackProbeInlineGenericBlock(
715 uint64_t AlignOffset) const {
716
717 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
718 const bool HasFP = hasFP(MF);
719 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
720 const X86TargetLowering &TLI = *STI.getTargetLowering();
721 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
722 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
723
724 uint64_t CurrentOffset = 0;
725
726 assert(AlignOffset < StackProbeSize);
727
728 // If the offset is so small it fits within a page, there's nothing to do.
729 if (StackProbeSize < Offset + AlignOffset) {
730
731 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
732 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
733 .setMIFlag(MachineInstr::FrameSetup);
734 if (!HasFP && NeedsDwarfCFI) {
735 BuildCFI(
736 MBB, MBBI, DL,
737 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
738 }
739
740 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
742 StackPtr, false, 0)
743 .addImm(0)
745 NumFrameExtraProbe++;
746 CurrentOffset = StackProbeSize - AlignOffset;
747 }
748
749 // For the next N - 1 pages, just probe. I tried to take advantage of
750 // natural probes but it implies much more logic and there was very few
751 // interesting natural probes to interleave.
752 while (CurrentOffset + StackProbeSize < Offset) {
753 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
754 .setMIFlag(MachineInstr::FrameSetup);
755
756 if (!HasFP && NeedsDwarfCFI) {
757 BuildCFI(
758 MBB, MBBI, DL,
759 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
760 }
761 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
763 StackPtr, false, 0)
764 .addImm(0)
766 NumFrameExtraProbe++;
767 CurrentOffset += StackProbeSize;
768 }
769
770 // No need to probe the tail, it is smaller than a Page.
771 uint64_t ChunkSize = Offset - CurrentOffset;
772 if (ChunkSize == SlotSize) {
773 // Use push for slot sized adjustments as a size optimization,
774 // like emitSPUpdate does when not probing.
775 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
776 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
777 BuildMI(MBB, MBBI, DL, TII.get(Opc))
780 } else {
781 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
782 .setMIFlag(MachineInstr::FrameSetup);
783 }
784 // No need to adjust Dwarf CFA offset here, the last position of the stack has
785 // been defined
786}
787
788void X86FrameLowering::emitStackProbeInlineGenericLoop(
791 uint64_t AlignOffset) const {
792 assert(Offset && "null offset");
793
794 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
796 "Inline stack probe loop will clobber live EFLAGS.");
797
798 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
799 const bool HasFP = hasFP(MF);
800 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
801 const X86TargetLowering &TLI = *STI.getTargetLowering();
802 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
803 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
804
805 if (AlignOffset) {
806 if (AlignOffset < StackProbeSize) {
807 // Perform a first smaller allocation followed by a probe.
808 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
809 .setMIFlag(MachineInstr::FrameSetup);
810
811 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
813 StackPtr, false, 0)
814 .addImm(0)
816 NumFrameExtraProbe++;
817 Offset -= AlignOffset;
818 }
819 }
820
821 // Synthesize a loop
822 NumFrameLoopProbe++;
823 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
824
825 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
826 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
827
829 MF.insert(MBBIter, testMBB);
830 MF.insert(MBBIter, tailMBB);
831
832 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
833 : Is64Bit ? X86::R11D
834 : X86::EAX;
835
836 // save loop bound
837 {
838 const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
839
840 // Can we calculate the loop bound using SUB with a 32-bit immediate?
841 // Note that the immediate gets sign-extended when used with a 64-bit
842 // register, so in that case we only have 31 bits to work with.
843 bool canUseSub =
844 Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
845
846 if (canUseSub) {
847 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
848
849 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
852 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
853 .addReg(FinalStackProbed)
854 .addImm(BoundOffset)
856 } else if (Uses64BitFramePtr) {
857 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
858 .addImm(-BoundOffset)
860 BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
861 .addReg(FinalStackProbed)
864 } else {
865 llvm_unreachable("Offset too large for 32-bit stack pointer");
866 }
867
868 // while in the loop, use loop-invariant reg for CFI,
869 // instead of the stack pointer, which changes during the loop
870 if (!HasFP && NeedsDwarfCFI) {
871 // x32 uses the same DWARF register numbers as x86-64,
872 // so there isn't a register number for r11d, we must use r11 instead
873 const Register DwarfFinalStackProbed =
874 STI.isTarget64BitILP32()
875 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
876 : FinalStackProbed;
877
880 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
882 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
883 }
884 }
885
886 // allocate a page
887 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
888 /*InEpilogue=*/false)
889 .setMIFlag(MachineInstr::FrameSetup);
890
891 // touch the page
892 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
894 StackPtr, false, 0)
895 .addImm(0)
897
898 // cmp with stack pointer bound
899 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
901 .addReg(FinalStackProbed)
903
904 // jump
905 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
906 .addMBB(testMBB)
909 testMBB->addSuccessor(testMBB);
910 testMBB->addSuccessor(tailMBB);
911
912 // BB management
913 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
915 MBB.addSuccessor(testMBB);
916
917 // handle tail
918 const uint64_t TailOffset = Offset % StackProbeSize;
919 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
920 if (TailOffset) {
921 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
922 /*InEpilogue=*/false)
923 .setMIFlag(MachineInstr::FrameSetup);
924 }
925
926 // after the loop, switch back to stack pointer for CFI
927 if (!HasFP && NeedsDwarfCFI) {
928 // x32 uses the same DWARF register numbers as x86-64,
929 // so there isn't a register number for esp, we must use rsp instead
930 const Register DwarfStackPtr =
931 STI.isTarget64BitILP32()
934
935 BuildCFI(*tailMBB, TailMBBIter, DL,
937 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
938 }
939
940 // Update Live In information
941 fullyRecomputeLiveIns({tailMBB, testMBB});
942}
943
944void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
946 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
947 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
948 assert(STI.is64Bit() && "different expansion needed for 32 bit");
949 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
950 const TargetInstrInfo &TII = *STI.getInstrInfo();
951 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
952
953 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
955 "Inline stack probe loop will clobber live EFLAGS.");
956
957 // RAX contains the number of bytes of desired stack adjustment.
958 // The handling here assumes this value has already been updated so as to
959 // maintain stack alignment.
960 //
961 // We need to exit with RSP modified by this amount and execute suitable
962 // page touches to notify the OS that we're growing the stack responsibly.
963 // All stack probing must be done without modifying RSP.
964 //
965 // MBB:
966 // SizeReg = RAX;
967 // ZeroReg = 0
968 // CopyReg = RSP
969 // Flags, TestReg = CopyReg - SizeReg
970 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
971 // LimitReg = gs magic thread env access
972 // if FinalReg >= LimitReg goto ContinueMBB
973 // RoundBB:
974 // RoundReg = page address of FinalReg
975 // LoopMBB:
976 // LoopReg = PHI(LimitReg,ProbeReg)
977 // ProbeReg = LoopReg - PageSize
978 // [ProbeReg] = 0
979 // if (ProbeReg > RoundReg) goto LoopMBB
980 // ContinueMBB:
981 // RSP = RSP - RAX
982 // [rest of original MBB]
983
984 // Set up the new basic blocks
985 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
986 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
987 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
988
989 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
990 MF.insert(MBBIter, RoundMBB);
991 MF.insert(MBBIter, LoopMBB);
992 MF.insert(MBBIter, ContinueMBB);
993
994 // Split MBB and move the tail portion down to ContinueMBB.
995 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
996 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
998
999 // Some useful constants
1000 const int64_t ThreadEnvironmentStackLimit = 0x10;
1001 const int64_t PageSize = 0x1000;
1002 const int64_t PageMask = ~(PageSize - 1);
1003
1004 // Registers we need. For the normal case we use virtual
1005 // registers. For the prolog expansion we use RAX, RCX and RDX.
1006 MachineRegisterInfo &MRI = MF.getRegInfo();
1007 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
1008 const Register
1009 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
1010 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1011 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1012 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1013 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1014 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1015 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1016 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1017 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
1018
1019 // SP-relative offsets where we can save RCX and RDX.
1020 int64_t RCXShadowSlot = 0;
1021 int64_t RDXShadowSlot = 0;
1022
1023 // If inlining in the prolog, save RCX and RDX.
1024 if (InProlog) {
1025 // Compute the offsets. We need to account for things already
1026 // pushed onto the stack at this point: return address, frame
1027 // pointer (if used), and callee saves.
1028 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1029 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1030 const bool HasFP = hasFP(MF);
1031
1032 // Check if we need to spill RCX and/or RDX.
1033 // Here we assume that no earlier prologue instruction changes RCX and/or
1034 // RDX, so checking the block live-ins is enough.
1035 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1036 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1037 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1038 // Assign the initial slot to both registers, then change RDX's slot if both
1039 // need to be spilled.
1040 if (IsRCXLiveIn)
1041 RCXShadowSlot = InitSlot;
1042 if (IsRDXLiveIn)
1043 RDXShadowSlot = InitSlot;
1044 if (IsRDXLiveIn && IsRCXLiveIn)
1045 RDXShadowSlot += 8;
1046 // Emit the saves if needed.
1047 if (IsRCXLiveIn)
1048 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1049 RCXShadowSlot)
1050 .addReg(X86::RCX);
1051 if (IsRDXLiveIn)
1052 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1053 RDXShadowSlot)
1054 .addReg(X86::RDX);
1055 } else {
1056 // Not in the prolog. Copy RAX to a virtual reg.
1057 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1058 }
1059
1060 // Add code to MBB to check for overflow and set the new target stack pointer
1061 // to zero if so.
1062 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1063 .addReg(ZeroReg, RegState::Undef)
1064 .addReg(ZeroReg, RegState::Undef);
1065 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1066 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1067 .addReg(CopyReg)
1068 .addReg(SizeReg);
1069 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1070 .addReg(TestReg)
1071 .addReg(ZeroReg)
1073
1074 // FinalReg now holds final stack pointer value, or zero if
1075 // allocation would overflow. Compare against the current stack
1076 // limit from the thread environment block. Note this limit is the
1077 // lowest touched page on the stack, not the point at which the OS
1078 // will cause an overflow exception, so this is just an optimization
1079 // to avoid unnecessarily touching pages that are below the current
1080 // SP but already committed to the stack by the OS.
1081 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1082 .addReg(0)
1083 .addImm(1)
1084 .addReg(0)
1085 .addImm(ThreadEnvironmentStackLimit)
1086 .addReg(X86::GS);
1087 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1088 // Jump if the desired stack pointer is at or above the stack limit.
1089 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1090 .addMBB(ContinueMBB)
1092
1093 // Add code to roundMBB to round the final stack pointer to a page boundary.
1094 if (InProlog)
1095 RoundMBB->addLiveIn(FinalReg);
1096 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1097 .addReg(FinalReg)
1098 .addImm(PageMask);
1099 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1100
1101 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1102 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1103 // and probe until we reach RoundedReg.
1104 if (!InProlog) {
1105 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1106 .addReg(LimitReg)
1107 .addMBB(RoundMBB)
1108 .addReg(ProbeReg)
1109 .addMBB(LoopMBB);
1110 }
1111
1112 if (InProlog)
1113 LoopMBB->addLiveIn(JoinReg);
1114 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1115 false, -PageSize);
1116
1117 // Probe by storing a byte onto the stack.
1118 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1119 .addReg(ProbeReg)
1120 .addImm(1)
1121 .addReg(0)
1122 .addImm(0)
1123 .addReg(0)
1124 .addImm(0);
1125
1126 if (InProlog)
1127 LoopMBB->addLiveIn(RoundedReg);
1128 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1129 .addReg(RoundedReg)
1130 .addReg(ProbeReg);
1131 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1132 .addMBB(LoopMBB)
1134
1135 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1136
1137 // If in prolog, restore RDX and RCX.
1138 if (InProlog) {
1139 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1140 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1141 TII.get(X86::MOV64rm), X86::RCX),
1142 X86::RSP, false, RCXShadowSlot);
1143 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1144 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1145 TII.get(X86::MOV64rm), X86::RDX),
1146 X86::RSP, false, RDXShadowSlot);
1147 }
1148
1149 // Now that the probing is done, add code to continueMBB to update
1150 // the stack pointer for real.
1151 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1152 .addReg(X86::RSP)
1153 .addReg(SizeReg);
1154
1155 // Add the control flow edges we need.
1156 MBB.addSuccessor(ContinueMBB);
1157 MBB.addSuccessor(RoundMBB);
1158 RoundMBB->addSuccessor(LoopMBB);
1159 LoopMBB->addSuccessor(ContinueMBB);
1160 LoopMBB->addSuccessor(LoopMBB);
1161
1162 if (InProlog) {
1163 LivePhysRegs LiveRegs;
1164 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1165 }
1166
1167 // Mark all the instructions added to the prolog as frame setup.
1168 if (InProlog) {
1169 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1170 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1171 }
1172 for (MachineInstr &MI : *RoundMBB) {
1174 }
1175 for (MachineInstr &MI : *LoopMBB) {
1177 }
1178 for (MachineInstr &MI :
1179 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1181 }
1182 }
1183}
1184
1185void X86FrameLowering::emitStackProbeCall(
1187 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1188 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1189 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1190
1191 // FIXME: Add indirect thunk support and remove this.
1192 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1193 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1194 "code model and indirect thunks not yet implemented.");
1195
1196 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1198 "Stack probe calls will clobber live EFLAGS.");
1199
1200 unsigned CallOp;
1201 if (Is64Bit)
1202 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1203 else
1204 CallOp = X86::CALLpcrel32;
1205
1206 StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
1207
1208 MachineInstrBuilder CI;
1209 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1210
1211 // All current stack probes take AX and SP as input, clobber flags, and
1212 // preserve all registers. x86_64 probes leave RSP unmodified.
1214 // For the large code model, we have to call through a register. Use R11,
1215 // as it is scratch in all supported calling conventions.
1216 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1218 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1219 } else {
1220 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1222 }
1223
1224 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1225 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1231
1232 MachineInstr *ModInst = CI;
1233 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1234 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1235 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1236 // themselves. They also does not clobber %rax so we can reuse it when
1237 // adjusting %rsp.
1238 // All other platforms do not specify a particular ABI for the stack probe
1239 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1240 ModInst =
1242 .addReg(SP)
1243 .addReg(AX);
1244 }
1245
1246 // DebugInfo variable locations -- if there's an instruction number for the
1247 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1248 // modifies SP.
1249 if (InstrNum) {
1250 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1251 // Label destination operand of the subtract.
1252 MF.makeDebugValueSubstitution(*InstrNum,
1253 {ModInst->getDebugInstrNum(), 0});
1254 } else {
1255 // Label the call. The operand number is the penultimate operand, zero
1256 // based.
1257 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1259 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1260 }
1261 }
1262
1263 if (InProlog) {
1264 // Apply the frame setup flag to all inserted instrs.
1265 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1266 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1267 }
1268}
1269
1270static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1271 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1272 // and might require smaller successive adjustments.
1273 const uint64_t Win64MaxSEHOffset = 128;
1274 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1275 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1276 return SEHFrameOffset & -16;
1277}
1278
1279// If we're forcing a stack realignment we can't rely on just the frame
1280// info, we need to know the ABI stack alignment as well in case we
1281// have a call out. Otherwise just make sure we have some alignment - we'll
1282// go with the minimum SlotSize.
1283uint64_t
1284X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1285 const MachineFrameInfo &MFI = MF.getFrameInfo();
1286 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1287 Align StackAlign = getStackAlign();
1288 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1289 if (HasRealign) {
1290 if (MFI.hasCalls())
1291 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1292 else if (MaxAlign < SlotSize)
1293 MaxAlign = Align(SlotSize);
1294 }
1295
1297 if (HasRealign)
1298 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1299 else
1300 MaxAlign = Align(16);
1301 }
1302 return MaxAlign.value();
1303}
1304
1305void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1307 const DebugLoc &DL, Register Reg,
1308 uint64_t MaxAlign) const {
1309 uint64_t Val = -MaxAlign;
1310 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1311
1312 MachineFunction &MF = *MBB.getParent();
1313 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
1314 const X86TargetLowering &TLI = *STI.getTargetLowering();
1315 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1316 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1317
1318 // We want to make sure that (in worst case) less than StackProbeSize bytes
1319 // are not probed after the AND. This assumption is used in
1320 // emitStackProbeInlineGeneric.
1321 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1322 {
1323 NumFrameLoopProbe++;
1324 MachineBasicBlock *entryMBB =
1326 MachineBasicBlock *headMBB =
1328 MachineBasicBlock *bodyMBB =
1330 MachineBasicBlock *footMBB =
1332
1334 MF.insert(MBBIter, entryMBB);
1335 MF.insert(MBBIter, headMBB);
1336 MF.insert(MBBIter, bodyMBB);
1337 MF.insert(MBBIter, footMBB);
1338 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1339 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1340 : Is64Bit ? X86::R11D
1341 : X86::EAX;
1342
1343 // Setup entry block
1344 {
1345
1346 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1347 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1350 MachineInstr *MI =
1351 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1352 .addReg(FinalStackProbed)
1353 .addImm(Val)
1355
1356 // The EFLAGS implicit def is dead.
1357 MI->getOperand(3).setIsDead();
1358
1359 BuildMI(entryMBB, DL,
1360 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1361 .addReg(FinalStackProbed)
1364 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1365 .addMBB(&MBB)
1368 entryMBB->addSuccessor(headMBB);
1369 entryMBB->addSuccessor(&MBB);
1370 }
1371
1372 // Loop entry block
1373
1374 {
1375 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1376 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1378 .addImm(StackProbeSize)
1380
1381 BuildMI(headMBB, DL,
1382 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1384 .addReg(FinalStackProbed)
1386
1387 // jump to the footer if StackPtr < FinalStackProbed
1388 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1389 .addMBB(footMBB)
1392
1393 headMBB->addSuccessor(bodyMBB);
1394 headMBB->addSuccessor(footMBB);
1395 }
1396
1397 // setup loop body
1398 {
1399 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1401 StackPtr, false, 0)
1402 .addImm(0)
1404
1405 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1406 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1408 .addImm(StackProbeSize)
1410
1411 // cmp with stack pointer bound
1412 BuildMI(bodyMBB, DL,
1413 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1414 .addReg(FinalStackProbed)
1417
1418 // jump back while FinalStackProbed < StackPtr
1419 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1420 .addMBB(bodyMBB)
1423 bodyMBB->addSuccessor(bodyMBB);
1424 bodyMBB->addSuccessor(footMBB);
1425 }
1426
1427 // setup loop footer
1428 {
1429 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1430 .addReg(FinalStackProbed)
1432 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1434 StackPtr, false, 0)
1435 .addImm(0)
1437 footMBB->addSuccessor(&MBB);
1438 }
1439
1440 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1441 }
1442 } else {
1443 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1444 .addReg(Reg)
1445 .addImm(Val)
1447
1448 // The EFLAGS implicit def is dead.
1449 MI->getOperand(3).setIsDead();
1450 }
1451}
1452
1454 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1455 // clobbered by any interrupt handler.
1456 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1457 "MF used frame lowering for wrong subtarget");
1458 const Function &Fn = MF.getFunction();
1459 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1460 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1461}
1462
1463/// Return true if we need to use the restricted Windows x64 prologue and
1464/// epilogue code patterns that can be described with WinCFI (.seh_*
1465/// directives).
1466bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1467 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1468}
1469
1470bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1471 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1472}
1473
1474/// Return true if an opcode is part of the REP group of instructions
1475static bool isOpcodeRep(unsigned Opcode) {
1476 switch (Opcode) {
1477 case X86::REPNE_PREFIX:
1478 case X86::REP_MOVSB_32:
1479 case X86::REP_MOVSB_64:
1480 case X86::REP_MOVSD_32:
1481 case X86::REP_MOVSD_64:
1482 case X86::REP_MOVSQ_32:
1483 case X86::REP_MOVSQ_64:
1484 case X86::REP_MOVSW_32:
1485 case X86::REP_MOVSW_64:
1486 case X86::REP_PREFIX:
1487 case X86::REP_STOSB_32:
1488 case X86::REP_STOSB_64:
1489 case X86::REP_STOSD_32:
1490 case X86::REP_STOSD_64:
1491 case X86::REP_STOSQ_32:
1492 case X86::REP_STOSQ_64:
1493 case X86::REP_STOSW_32:
1494 case X86::REP_STOSW_64:
1495 return true;
1496 default:
1497 break;
1498 }
1499 return false;
1500}
1501
1502/// emitPrologue - Push callee-saved registers onto the stack, which
1503/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1504/// space for local variables. Also emit labels used by the exception handler to
1505/// generate the exception handling frames.
1506
1507/*
1508 Here's a gist of what gets emitted:
1509
1510 ; Establish frame pointer, if needed
1511 [if needs FP]
1512 push %rbp
1513 .cfi_def_cfa_offset 16
1514 .cfi_offset %rbp, -16
1515 .seh_pushreg %rpb
1516 mov %rsp, %rbp
1517 .cfi_def_cfa_register %rbp
1518
1519 ; Spill general-purpose registers
1520 [for all callee-saved GPRs]
1521 pushq %<reg>
1522 [if not needs FP]
1523 .cfi_def_cfa_offset (offset from RETADDR)
1524 .seh_pushreg %<reg>
1525
1526 ; If the required stack alignment > default stack alignment
1527 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1528 ; of unknown size in the stack frame.
1529 [if stack needs re-alignment]
1530 and $MASK, %rsp
1531
1532 ; Allocate space for locals
1533 [if target is Windows and allocated space > 4096 bytes]
1534 ; Windows needs special care for allocations larger
1535 ; than one page.
1536 mov $NNN, %rax
1537 call ___chkstk_ms/___chkstk
1538 sub %rax, %rsp
1539 [else]
1540 sub $NNN, %rsp
1541
1542 [if needs FP]
1543 .seh_stackalloc (size of XMM spill slots)
1544 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1545 [else]
1546 .seh_stackalloc NNN
1547
1548 ; Spill XMMs
1549 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1550 ; they may get spilled on any platform, if the current function
1551 ; calls @llvm.eh.unwind.init
1552 [if needs FP]
1553 [for all callee-saved XMM registers]
1554 movaps %<xmm reg>, -MMM(%rbp)
1555 [for all callee-saved XMM registers]
1556 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1557 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1558 [else]
1559 [for all callee-saved XMM registers]
1560 movaps %<xmm reg>, KKK(%rsp)
1561 [for all callee-saved XMM registers]
1562 .seh_savexmm %<xmm reg>, KKK
1563
1564 .seh_endprologue
1565
1566 [if needs base pointer]
1567 mov %rsp, %rbx
1568 [if needs to restore base pointer]
1569 mov %rsp, -MMM(%rbp)
1570
1571 ; Emit CFI info
1572 [if needs FP]
1573 [for all callee-saved registers]
1574 .cfi_offset %<reg>, (offset from %rbp)
1575 [else]
1576 .cfi_def_cfa_offset (offset from RETADDR)
1577 [for all callee-saved registers]
1578 .cfi_offset %<reg>, (offset from %rsp)
1579
1580 Notes:
1581 - .seh directives are emitted only for Windows 64 ABI
1582 - .cv_fpo directives are emitted on win32 when emitting CodeView
1583 - .cfi directives are emitted for all other ABIs
1584 - for 32-bit code, substitute %e?? registers for %r??
1585*/
1586
1588 MachineBasicBlock &MBB) const {
1589 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1590 "MF used frame lowering for wrong subtarget");
1592 MachineFrameInfo &MFI = MF.getFrameInfo();
1593 const Function &Fn = MF.getFunction();
1595 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1596 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1597 bool IsFunclet = MBB.isEHFuncletEntry();
1599 if (Fn.hasPersonalityFn())
1600 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1601 bool FnHasClrFunclet =
1602 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1603 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1604 bool HasFP = hasFP(MF);
1605 bool IsWin64Prologue = isWin64Prologue(MF);
1606 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1607 // FIXME: Emit FPO data for EH funclets.
1608 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1610 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1611 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1612 Register FramePtr = TRI->getFrameRegister(MF);
1613 const Register MachineFramePtr =
1614 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
1615 : FramePtr;
1616 Register BasePtr = TRI->getBaseRegister();
1617 bool HasWinCFI = false;
1618
1619 // Debug location must be unknown since the first debug location is used
1620 // to determine the end of the prologue.
1621 DebugLoc DL;
1622 Register ArgBaseReg;
1623
1624 // Emit extra prolog for argument stack slot reference.
1625 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1626 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1627 // Creat extra prolog for stack realignment.
1628 ArgBaseReg = MI->getOperand(0).getReg();
1629 // leal 4(%esp), %basereg
1630 // .cfi_def_cfa %basereg, 0
1631 // andl $-128, %esp
1632 // pushl -4(%basereg)
1633 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1634 ArgBaseReg)
1636 .addImm(1)
1637 .addUse(X86::NoRegister)
1639 .addUse(X86::NoRegister)
1641 if (NeedsDwarfCFI) {
1642 // .cfi_def_cfa %basereg, 0
1643 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1644 BuildCFI(MBB, MBBI, DL,
1645 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1647 }
1648 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1649 int64_t Offset = -(int64_t)SlotSize;
1650 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1651 .addReg(ArgBaseReg)
1652 .addImm(1)
1653 .addReg(X86::NoRegister)
1654 .addImm(Offset)
1655 .addReg(X86::NoRegister)
1657 }
1658
1659 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1660 // tail call.
1661 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1662 if (TailCallArgReserveSize && IsWin64Prologue)
1663 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1664
1665 const bool EmitStackProbeCall =
1666 STI.getTargetLowering()->hasStackProbeSymbol(MF);
1667 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1668
1669 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1672 if (STI.swiftAsyncContextIsDynamicallySet()) {
1673 // The special symbol below is absolute and has a *value* suitable to be
1674 // combined with the frame pointer directly.
1675 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1676 .addUse(MachineFramePtr)
1677 .addUse(X86::RIP)
1678 .addImm(1)
1679 .addUse(X86::NoRegister)
1680 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1682 .addUse(X86::NoRegister);
1683 break;
1684 }
1685 [[fallthrough]];
1686
1688 assert(
1689 !IsWin64Prologue &&
1690 "win64 prologue does not set the bit 60 in the saved frame pointer");
1691 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1692 .addUse(MachineFramePtr)
1693 .addImm(60)
1695 break;
1696
1698 break;
1699 }
1700 }
1701
1702 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1703 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1704 // stack alignment.
1706 Fn.arg_size() == 2) {
1707 StackSize += 8;
1708 MFI.setStackSize(StackSize);
1709
1710 // Update the stack pointer by pushing a register. This is the instruction
1711 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1712 // Hard-coding the update to a push avoids emitting a second
1713 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1714 // probing isn't needed anyways for an 8-byte update.
1715 // Pushing a register leaves us in a similar situation to a regular
1716 // function call where we know that the address at (rsp-8) is writeable.
1717 // That way we avoid any off-by-ones with stack probing for additional
1718 // stack pointer updates later on.
1719 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1720 .addReg(X86::RAX, RegState::Undef)
1722 }
1723
1724 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1725 // function, and use up to 128 bytes of stack space, don't have a frame
1726 // pointer, calls, or dynamic alloca then we do not need to adjust the
1727 // stack pointer (we fit in the Red Zone). We also check that we don't
1728 // push and pop from the stack.
1729 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1730 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1731 !MFI.adjustsStack() && // No calls.
1732 !EmitStackProbeCall && // No stack probes.
1733 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1734 !MF.shouldSplitStack()) { // Regular stack
1735 uint64_t MinSize =
1737 if (HasFP)
1738 MinSize += SlotSize;
1739 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1740 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1741 MFI.setStackSize(StackSize);
1742 }
1743
1744 // Insert stack pointer adjustment for later moving of return addr. Only
1745 // applies to tail call optimized functions where the callee argument stack
1746 // size is bigger than the callers.
1747 if (TailCallArgReserveSize != 0) {
1748 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1749 /*InEpilogue=*/false)
1750 .setMIFlag(MachineInstr::FrameSetup);
1751 }
1752
1753 // Mapping for machine moves:
1754 //
1755 // DST: VirtualFP AND
1756 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1757 // ELSE => DW_CFA_def_cfa
1758 //
1759 // SRC: VirtualFP AND
1760 // DST: Register => DW_CFA_def_cfa_register
1761 //
1762 // ELSE
1763 // OFFSET < 0 => DW_CFA_offset_extended_sf
1764 // REG < 64 => DW_CFA_offset + Reg
1765 // ELSE => DW_CFA_offset_extended
1766
1767 uint64_t NumBytes = 0;
1768 int stackGrowth = -SlotSize;
1769
1770 // Find the funclet establisher parameter
1771 MCRegister Establisher;
1772 if (IsClrFunclet)
1773 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1774 else if (IsFunclet)
1775 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1776
1777 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1778 // Immediately spill establisher into the home slot.
1779 // The runtime cares about this.
1780 // MOV64mr %rdx, 16(%rsp)
1781 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1782 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1783 .addReg(Establisher)
1785 MBB.addLiveIn(Establisher);
1786 }
1787
1788 if (HasFP) {
1789 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1790
1791 // Calculate required stack adjustment.
1792 uint64_t FrameSize = StackSize - SlotSize;
1793 NumBytes =
1794 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1795
1796 // Callee-saved registers are pushed on stack before the stack is realigned.
1797 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1798 NumBytes = alignTo(NumBytes, MaxAlign);
1799
1800 // Save EBP/RBP into the appropriate stack slot.
1801 BuildMI(MBB, MBBI, DL,
1803 .addReg(MachineFramePtr, RegState::Kill)
1805
1806 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1807 // Mark the place where EBP/RBP was saved.
1808 // Define the current CFA rule to use the provided offset.
1809 assert(StackSize);
1810 BuildCFI(MBB, MBBI, DL,
1812 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1814
1815 // Change the rule for the FramePtr to be an "offset" rule.
1816 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1817 BuildCFI(MBB, MBBI, DL,
1818 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1819 2 * stackGrowth -
1820 (int)TailCallArgReserveSize),
1822 }
1823
1824 if (NeedsWinCFI) {
1825 HasWinCFI = true;
1826 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1829 }
1830
1831 if (!IsFunclet) {
1832 if (X86FI->hasSwiftAsyncContext()) {
1833 assert(!IsWin64Prologue &&
1834 "win64 prologue does not store async context right below rbp");
1835 const auto &Attrs = MF.getFunction().getAttributes();
1836
1837 // Before we update the live frame pointer we have to ensure there's a
1838 // valid (or null) asynchronous context in its slot just before FP in
1839 // the frame record, so store it now.
1840 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1841 // We have an initial context in r14, store it just before the frame
1842 // pointer.
1843 MBB.addLiveIn(X86::R14);
1844 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1845 .addReg(X86::R14)
1847 } else {
1848 // No initial context, store null so that there's no pointer that
1849 // could be misused.
1850 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1851 .addImm(0)
1853 }
1854
1855 if (NeedsWinCFI) {
1856 HasWinCFI = true;
1857 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1858 .addImm(X86::R14)
1860 }
1861
1862 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1863 .addUse(X86::RSP)
1864 .addImm(1)
1865 .addUse(X86::NoRegister)
1866 .addImm(8)
1867 .addUse(X86::NoRegister)
1869 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1870 .addUse(X86::RSP)
1871 .addImm(8)
1873 }
1874
1875 if (!IsWin64Prologue && !IsFunclet) {
1876 // Update EBP with the new base value.
1877 if (!X86FI->hasSwiftAsyncContext())
1878 BuildMI(MBB, MBBI, DL,
1879 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1880 FramePtr)
1883
1884 if (NeedsDwarfCFI) {
1885 if (ArgBaseReg.isValid()) {
1886 SmallString<64> CfaExpr;
1887 CfaExpr.push_back(dwarf::DW_CFA_expression);
1888 uint8_t buffer[16];
1889 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1890 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1891 CfaExpr.push_back(2);
1892 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1893 CfaExpr.push_back(0);
1894 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1895 BuildCFI(MBB, MBBI, DL,
1896 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1898 } else {
1899 // Mark effective beginning of when frame pointer becomes valid.
1900 // Define the current CFA to use the EBP/RBP register.
1901 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1902 BuildCFI(
1903 MBB, MBBI, DL,
1904 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1906 }
1907 }
1908
1909 if (NeedsWinFPO) {
1910 // .cv_fpo_setframe $FramePtr
1911 HasWinCFI = true;
1912 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1914 .addImm(0)
1916 }
1917 }
1918 }
1919 } else {
1920 assert(!IsFunclet && "funclets without FPs not yet implemented");
1921 NumBytes =
1922 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1923 }
1924
1925 // Update the offset adjustment, which is mainly used by codeview to translate
1926 // from ESP to VFRAME relative local variable offsets.
1927 if (!IsFunclet) {
1928 if (HasFP && TRI->hasStackRealignment(MF))
1929 MFI.setOffsetAdjustment(-NumBytes);
1930 else
1931 MFI.setOffsetAdjustment(-StackSize);
1932 }
1933
1934 // For EH funclets, only allocate enough space for outgoing calls. Save the
1935 // NumBytes value that we would've used for the parent frame.
1936 unsigned ParentFrameNumBytes = NumBytes;
1937 if (IsFunclet)
1938 NumBytes = getWinEHFuncletFrameSize(MF);
1939
1940 // Skip the callee-saved push instructions.
1941 bool PushedRegs = false;
1942 int StackOffset = 2 * stackGrowth;
1944 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1945 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1946 return false;
1947 unsigned Opc = MBBI->getOpcode();
1948 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1949 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1950 };
1951
1952 while (IsCSPush(MBBI)) {
1953 PushedRegs = true;
1954 Register Reg = MBBI->getOperand(0).getReg();
1955 LastCSPush = MBBI;
1956 ++MBBI;
1957 unsigned Opc = LastCSPush->getOpcode();
1958
1959 if (!HasFP && NeedsDwarfCFI) {
1960 // Mark callee-saved push instruction.
1961 // Define the current CFA rule to use the provided offset.
1962 assert(StackSize);
1963 // Compared to push, push2 introduces more stack offset (one more
1964 // register).
1965 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1966 StackOffset += stackGrowth;
1967 BuildCFI(MBB, MBBI, DL,
1970 StackOffset += stackGrowth;
1971 }
1972
1973 if (NeedsWinCFI) {
1974 HasWinCFI = true;
1975 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1976 .addImm(Reg)
1978 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1979 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1980 .addImm(LastCSPush->getOperand(1).getReg())
1982 }
1983 }
1984
1985 // Realign stack after we pushed callee-saved registers (so that we'll be
1986 // able to calculate their offsets from the frame pointer).
1987 // Don't do this for Win64, it needs to realign the stack after the prologue.
1988 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1989 !ArgBaseReg.isValid()) {
1990 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1991 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1992
1993 if (NeedsWinCFI) {
1994 HasWinCFI = true;
1995 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1996 .addImm(MaxAlign)
1998 }
1999 }
2000
2001 // If there is an SUB32ri of ESP immediately before this instruction, merge
2002 // the two. This can be the case when tail call elimination is enabled and
2003 // the callee has more arguments than the caller.
2004 NumBytes = mergeSPUpdates(
2005 MBB, MBBI, [NumBytes](int64_t Offset) { return NumBytes - Offset; },
2006 true);
2007
2008 // Adjust stack pointer: ESP -= numbytes.
2009
2010 // Windows and cygwin/mingw require a prologue helper routine when allocating
2011 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
2012 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
2013 // stack and adjust the stack pointer in one go. The 64-bit version of
2014 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
2015 // responsible for adjusting the stack pointer. Touching the stack at 4K
2016 // increments is necessary to ensure that the guard pages used by the OS
2017 // virtual memory manager are allocated in correct sequence.
2018 uint64_t AlignedNumBytes = NumBytes;
2019 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
2020 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
2021 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
2022 assert(!X86FI->getUsesRedZone() &&
2023 "The Red Zone is not accounted for in stack probes");
2024
2025 // Check whether EAX is livein for this block.
2026 bool isEAXAlive = isEAXLiveIn(MBB);
2027
2028 if (isEAXAlive) {
2029 if (Is64Bit) {
2030 // Save RAX
2031 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
2032 .addReg(X86::RAX, RegState::Kill)
2034 } else {
2035 // Save EAX
2036 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
2037 .addReg(X86::EAX, RegState::Kill)
2039 }
2040 }
2041
2042 if (Is64Bit) {
2043 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2044 // Function prologue is responsible for adjusting the stack pointer.
2045 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2046 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
2047 .addImm(Alloc)
2049 } else {
2050 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2051 // We'll also use 4 already allocated bytes for EAX.
2052 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2053 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2055 }
2056
2057 // Call __chkstk, __chkstk_ms, or __alloca.
2058 emitStackProbe(MF, MBB, MBBI, DL, true);
2059
2060 if (isEAXAlive) {
2061 // Restore RAX/EAX
2063 if (Is64Bit)
2064 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2065 StackPtr, false, NumBytes - 8);
2066 else
2067 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2068 StackPtr, false, NumBytes - 4);
2069 MI->setFlag(MachineInstr::FrameSetup);
2070 MBB.insert(MBBI, MI);
2071 }
2072 } else if (NumBytes) {
2073 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2074 }
2075
2076 if (NeedsWinCFI && NumBytes) {
2077 HasWinCFI = true;
2078 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2079 .addImm(NumBytes)
2081 }
2082
2083 int SEHFrameOffset = 0;
2084 Register SPOrEstablisher;
2085 if (IsFunclet) {
2086 if (IsClrFunclet) {
2087 // The establisher parameter passed to a CLR funclet is actually a pointer
2088 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2089 // to find the root function establisher frame by loading the PSPSym from
2090 // the intermediate frame.
2091 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2092 MachinePointerInfo NoInfo;
2093 MBB.addLiveIn(Establisher);
2094 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2095 Establisher, false, PSPSlotOffset)
2098 ;
2099 // Save the root establisher back into the current funclet's (mostly
2100 // empty) frame, in case a sub-funclet or the GC needs it.
2101 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2102 false, PSPSlotOffset)
2103 .addReg(Establisher)
2105 NoInfo,
2108 }
2109 SPOrEstablisher = Establisher;
2110 } else {
2111 SPOrEstablisher = StackPtr;
2112 }
2113
2114 if (IsWin64Prologue && HasFP) {
2115 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2116 // this calculation on the incoming establisher, which holds the value of
2117 // RSP from the parent frame at the end of the prologue.
2118 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2119 if (SEHFrameOffset)
2120 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2121 SPOrEstablisher, false, SEHFrameOffset);
2122 else
2123 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2124 .addReg(SPOrEstablisher);
2125
2126 // If this is not a funclet, emit the CFI describing our frame pointer.
2127 if (NeedsWinCFI && !IsFunclet) {
2128 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2129 HasWinCFI = true;
2130 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2132 .addImm(SEHFrameOffset)
2134 if (isAsynchronousEHPersonality(Personality))
2135 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2136 }
2137 } else if (IsFunclet && STI.is32Bit()) {
2138 // Reset EBP / ESI to something good for funclets.
2140 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2141 // into the registration node so that the runtime will restore it for us.
2142 if (!MBB.isCleanupFuncletEntry()) {
2143 assert(Personality == EHPersonality::MSVC_CXX);
2144 Register FrameReg;
2146 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2147 // ESP is the first field, so no extra displacement is needed.
2148 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2149 false, EHRegOffset)
2150 .addReg(X86::ESP);
2151 }
2152 }
2153
2154 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2155 const MachineInstr &FrameInstr = *MBBI;
2156 ++MBBI;
2157
2158 if (NeedsWinCFI) {
2159 int FI;
2160 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2161 if (X86::FR64RegClass.contains(Reg)) {
2162 int Offset;
2163 Register IgnoredFrameReg;
2164 if (IsWin64Prologue && IsFunclet)
2165 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2166 else
2167 Offset =
2168 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2169 SEHFrameOffset;
2170
2171 HasWinCFI = true;
2172 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2173 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2174 .addImm(Reg)
2175 .addImm(Offset)
2177 }
2178 }
2179 }
2180 }
2181
2182 if (NeedsWinCFI && HasWinCFI)
2183 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2185
2186 if (FnHasClrFunclet && !IsFunclet) {
2187 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2188 // immediately after the prolog) into the PSPSlot so that funclets
2189 // and the GC can recover it.
2190 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2191 auto PSPInfo = MachinePointerInfo::getFixedStack(
2193 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2194 PSPSlotOffset)
2199 }
2200
2201 // Realign stack after we spilled callee-saved registers (so that we'll be
2202 // able to calculate their offsets from the frame pointer).
2203 // Win64 requires aligning the stack after the prologue.
2204 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2205 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2206 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2207 }
2208
2209 // We already dealt with stack realignment and funclets above.
2210 if (IsFunclet && STI.is32Bit())
2211 return;
2212
2213 // If we need a base pointer, set it up here. It's whatever the value
2214 // of the stack pointer is at this point. Any variable size objects
2215 // will be allocated after this, so we can still use the base pointer
2216 // to reference locals.
2217 if (TRI->hasBasePointer(MF)) {
2218 // Update the base pointer with the current stack pointer.
2219 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2220 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2221 .addReg(SPOrEstablisher)
2223 if (X86FI->getRestoreBasePointer()) {
2224 // Stash value of base pointer. Saving RSP instead of EBP shortens
2225 // dependence chain. Used by SjLj EH.
2226 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2227 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2229 .addReg(SPOrEstablisher)
2231 }
2232
2233 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2234 // Stash the value of the frame pointer relative to the base pointer for
2235 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2236 // it recovers the frame pointer from the base pointer rather than the
2237 // other way around.
2238 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2239 Register UsedReg;
2240 int Offset =
2241 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2242 .getFixed();
2243 assert(UsedReg == BasePtr);
2244 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2247 }
2248 }
2249 if (ArgBaseReg.isValid()) {
2250 // Save argument base pointer.
2251 auto *MI = X86FI->getStackPtrSaveMI();
2252 int FI = MI->getOperand(1).getIndex();
2253 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2254 // movl %basereg, offset(%ebp)
2255 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2256 .addReg(ArgBaseReg)
2258 }
2259
2260 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2261 // Mark end of stack pointer adjustment.
2262 if (!HasFP && NumBytes) {
2263 // Define the current CFA rule to use the provided offset.
2264 assert(StackSize);
2265 BuildCFI(
2266 MBB, MBBI, DL,
2267 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2269 }
2270
2271 // Emit DWARF info specifying the offsets of the callee-saved registers.
2273 }
2274
2275 // X86 Interrupt handling function cannot assume anything about the direction
2276 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2277 // in each prologue of interrupt handler function.
2278 //
2279 // Create "cld" instruction only in these cases:
2280 // 1. The interrupt handling function uses any of the "rep" instructions.
2281 // 2. Interrupt handling function calls another function.
2282 // 3. If there are any inline asm blocks, as we do not know what they do
2283 //
2284 // TODO: We should also emit cld if we detect the use of std, but as of now,
2285 // the compiler does not even emit that instruction or even define it, so in
2286 // practice, this would only happen with inline asm, which we cover anyway.
2288 bool NeedsCLD = false;
2289
2290 for (const MachineBasicBlock &B : MF) {
2291 for (const MachineInstr &MI : B) {
2292 if (MI.isCall()) {
2293 NeedsCLD = true;
2294 break;
2295 }
2296
2297 if (isOpcodeRep(MI.getOpcode())) {
2298 NeedsCLD = true;
2299 break;
2300 }
2301
2302 if (MI.isInlineAsm()) {
2303 // TODO: Parse asm for rep instructions or call sites?
2304 // For now, let's play it safe and emit a cld instruction
2305 // just in case.
2306 NeedsCLD = true;
2307 break;
2308 }
2309 }
2310 }
2311
2312 if (NeedsCLD) {
2313 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2315 }
2316 }
2317
2318 // At this point we know if the function has WinCFI or not.
2319 MF.setHasWinCFI(HasWinCFI);
2320}
2321
2323 const MachineFunction &MF) const {
2324 // We can't use LEA instructions for adjusting the stack pointer if we don't
2325 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2326 // to deallocate the stack.
2327 // This means that we can use LEA for SP in two situations:
2328 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2329 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2330 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2331}
2332
2334 switch (MI.getOpcode()) {
2335 case X86::CATCHRET:
2336 case X86::CLEANUPRET:
2337 return true;
2338 default:
2339 return false;
2340 }
2341 llvm_unreachable("impossible");
2342}
2343
2344// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2345// stack. It holds a pointer to the bottom of the root function frame. The
2346// establisher frame pointer passed to a nested funclet may point to the
2347// (mostly empty) frame of its parent funclet, but it will need to find
2348// the frame of the root function to access locals. To facilitate this,
2349// every funclet copies the pointer to the bottom of the root function
2350// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2351// same offset for the PSPSym in the root function frame that's used in the
2352// funclets' frames allows each funclet to dynamically accept any ancestor
2353// frame as its establisher argument (the runtime doesn't guarantee the
2354// immediate parent for some reason lost to history), and also allows the GC,
2355// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2356// frame with only a single offset reported for the entire method.
2357unsigned
2358X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2359 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2361 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2362 /*IgnoreSPUpdates*/ true)
2363 .getFixed();
2364 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2365 return static_cast<unsigned>(Offset);
2366}
2367
2368unsigned
2369X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2370 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2371 // This is the size of the pushed CSRs.
2372 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2373 // This is the size of callee saved XMMs.
2374 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2375 unsigned XMMSize =
2376 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2377 // This is the amount of stack a funclet needs to allocate.
2378 unsigned UsedSize;
2379 EHPersonality Personality =
2381 if (Personality == EHPersonality::CoreCLR) {
2382 // CLR funclets need to hold enough space to include the PSPSym, at the
2383 // same offset from the stack pointer (immediately after the prolog) as it
2384 // resides at in the main function.
2385 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2386 } else {
2387 // Other funclets just need enough stack for outgoing call arguments.
2388 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2389 }
2390 // RBP is not included in the callee saved register block. After pushing RBP,
2391 // everything is 16 byte aligned. Everything we allocate before an outgoing
2392 // call must also be 16 byte aligned.
2393 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2394 // Subtract out the size of the callee saved registers. This is how much stack
2395 // each funclet will allocate.
2396 return FrameSizeMinusRBP + XMMSize - CSSize;
2397}
2398
2399static bool isTailCallOpcode(unsigned Opc) {
2400 return Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri ||
2401 Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi ||
2402 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2403 Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
2404 Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURN_WINmi64;
2405}
2406
2408 MachineBasicBlock &MBB) const {
2409 const MachineFrameInfo &MFI = MF.getFrameInfo();
2411 MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator();
2412 MachineBasicBlock::iterator MBBI = Terminator;
2413 DebugLoc DL;
2414 if (MBBI != MBB.end())
2415 DL = MBBI->getDebugLoc();
2416 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
2417 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2418 Register FramePtr = TRI->getFrameRegister(MF);
2419 Register MachineFramePtr =
2420 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2421
2422 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2423 bool NeedsWin64CFI =
2424 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2425 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2426
2427 // Get the number of bytes to allocate from the FrameInfo.
2428 uint64_t StackSize = MFI.getStackSize();
2429 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2430 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2431 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2432 bool HasFP = hasFP(MF);
2433 uint64_t NumBytes = 0;
2434
2435 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2437 !MF.getTarget().getTargetTriple().isUEFI()) &&
2438 MF.needsFrameMoves();
2439
2440 Register ArgBaseReg;
2441 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2442 unsigned Opc = X86::LEA32r;
2443 Register StackReg = X86::ESP;
2444 ArgBaseReg = MI->getOperand(0).getReg();
2445 if (STI.is64Bit()) {
2446 Opc = X86::LEA64r;
2447 StackReg = X86::RSP;
2448 }
2449 // leal -4(%basereg), %esp
2450 // .cfi_def_cfa %esp, 4
2451 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2452 .addUse(ArgBaseReg)
2453 .addImm(1)
2454 .addUse(X86::NoRegister)
2455 .addImm(-(int64_t)SlotSize)
2456 .addUse(X86::NoRegister)
2458 if (NeedsDwarfCFI) {
2459 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2460 BuildCFI(MBB, MBBI, DL,
2461 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2463 --MBBI;
2464 }
2465 --MBBI;
2466 }
2467
2468 if (IsFunclet) {
2469 assert(HasFP && "EH funclets without FP not yet implemented");
2470 NumBytes = getWinEHFuncletFrameSize(MF);
2471 } else if (HasFP) {
2472 // Calculate required stack adjustment.
2473 uint64_t FrameSize = StackSize - SlotSize;
2474 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2475
2476 // Callee-saved registers were pushed on stack before the stack was
2477 // realigned.
2478 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2479 NumBytes = alignTo(FrameSize, MaxAlign);
2480 } else {
2481 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2482 }
2483 uint64_t SEHStackAllocAmt = NumBytes;
2484
2485 // AfterPop is the position to insert .cfi_restore.
2487 if (HasFP) {
2488 if (X86FI->hasSwiftAsyncContext()) {
2489 // Discard the context.
2490 int64_t Offset = mergeSPAdd(MBB, MBBI, 16, true);
2491 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2492 }
2493 // Pop EBP.
2494 BuildMI(MBB, MBBI, DL,
2496 MachineFramePtr)
2498
2499 // We need to reset FP to its untagged state on return. Bit 60 is currently
2500 // used to show the presence of an extended frame.
2501 if (X86FI->hasSwiftAsyncContext()) {
2502 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2503 .addUse(MachineFramePtr)
2504 .addImm(60)
2506 }
2507
2508 if (NeedsDwarfCFI) {
2509 if (!ArgBaseReg.isValid()) {
2510 unsigned DwarfStackPtr =
2511 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2512 BuildCFI(MBB, MBBI, DL,
2513 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2515 }
2516 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2517 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2518 BuildCFI(MBB, AfterPop, DL,
2519 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2521 --MBBI;
2522 --AfterPop;
2523 }
2524 --MBBI;
2525 }
2526 }
2527
2528 MachineBasicBlock::iterator FirstCSPop = MBBI;
2529 // Skip the callee-saved pop instructions.
2530 while (MBBI != MBB.begin()) {
2531 MachineBasicBlock::iterator PI = std::prev(MBBI);
2532 unsigned Opc = PI->getOpcode();
2533
2534 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2535 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2536 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2537 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2538 Opc != X86::POP2P && Opc != X86::LEA64r))
2539 break;
2540 FirstCSPop = PI;
2541 }
2542
2543 --MBBI;
2544 }
2545 if (ArgBaseReg.isValid()) {
2546 // Restore argument base pointer.
2547 auto *MI = X86FI->getStackPtrSaveMI();
2548 int FI = MI->getOperand(1).getIndex();
2549 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2550 // movl offset(%ebp), %basereg
2551 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2553 }
2554 MBBI = FirstCSPop;
2555
2556 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2557 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2558
2559 if (MBBI != MBB.end())
2560 DL = MBBI->getDebugLoc();
2561 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2562 // instruction, merge the two instructions.
2563 if (NumBytes || MFI.hasVarSizedObjects())
2564 NumBytes = mergeSPAdd(MBB, MBBI, NumBytes, true);
2565
2566 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2567 // slot before popping them off! Same applies for the case, when stack was
2568 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2569 // will not do realignment or dynamic stack allocation.
2570 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2571 !IsFunclet) {
2572 if (TRI->hasStackRealignment(MF))
2573 MBBI = FirstCSPop;
2574 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2575 uint64_t LEAAmount =
2576 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2577
2578 if (X86FI->hasSwiftAsyncContext())
2579 LEAAmount -= 16;
2580
2581 // There are only two legal forms of epilogue:
2582 // - add SEHAllocationSize, %rsp
2583 // - lea SEHAllocationSize(%FramePtr), %rsp
2584 //
2585 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2586 // However, we may use this sequence if we have a frame pointer because the
2587 // effects of the prologue can safely be undone.
2588 if (LEAAmount != 0) {
2591 false, LEAAmount);
2592 --MBBI;
2593 } else {
2594 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2596 --MBBI;
2597 }
2598 } else if (NumBytes) {
2599 // Adjust stack pointer back: ESP += numbytes.
2600 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2601 if (!HasFP && NeedsDwarfCFI) {
2602 // Define the current CFA rule to use the provided offset.
2603 BuildCFI(MBB, MBBI, DL,
2605 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2607 }
2608 --MBBI;
2609 }
2610
2611 if (NeedsWin64CFI && MF.hasWinCFI())
2612 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_BeginEpilogue));
2613
2614 if (!HasFP && NeedsDwarfCFI) {
2615 MBBI = FirstCSPop;
2616 int64_t Offset = -(int64_t)CSSize - SlotSize;
2617 // Mark callee-saved pop instruction.
2618 // Define the current CFA rule to use the provided offset.
2619 while (MBBI != MBB.end()) {
2621 unsigned Opc = PI->getOpcode();
2622 ++MBBI;
2623 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2624 Opc == X86::POP2 || Opc == X86::POP2P) {
2625 Offset += SlotSize;
2626 // Compared to pop, pop2 introduces more stack offset (one more
2627 // register).
2628 if (Opc == X86::POP2 || Opc == X86::POP2P)
2629 Offset += SlotSize;
2630 BuildCFI(MBB, MBBI, DL,
2633 }
2634 }
2635 }
2636
2637 // Emit DWARF info specifying the restores of the callee-saved registers.
2638 // For epilogue with return inside or being other block without successor,
2639 // no need to generate .cfi_restore for callee-saved registers.
2640 if (NeedsDwarfCFI && !MBB.succ_empty())
2641 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2642
2643 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2644 // Add the return addr area delta back since we are not tail calling.
2645 int64_t Delta = X86FI->getTCReturnAddrDelta();
2646 assert(Delta <= 0 && "TCDelta should never be positive");
2647 if (Delta) {
2648 // Check for possible merge with preceding ADD instruction.
2649 int64_t Offset = mergeSPAdd(MBB, Terminator, -Delta, true);
2650 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2651 }
2652 }
2653
2654 // Emit tilerelease for AMX kernel.
2656 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2657
2658 if (NeedsWin64CFI && MF.hasWinCFI())
2659 BuildMI(MBB, Terminator, DL, TII.get(X86::SEH_EndEpilogue));
2660}
2661
2663 int FI,
2664 Register &FrameReg) const {
2665 const MachineFrameInfo &MFI = MF.getFrameInfo();
2666
2667 bool IsFixed = MFI.isFixedObjectIndex(FI);
2668 // We can't calculate offset from frame pointer if the stack is realigned,
2669 // so enforce usage of stack/base pointer. The base pointer is used when we
2670 // have dynamic allocas in addition to dynamic realignment.
2671 if (TRI->hasBasePointer(MF))
2672 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2673 else if (TRI->hasStackRealignment(MF))
2674 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2675 else
2676 FrameReg = TRI->getFrameRegister(MF);
2677
2678 // Offset will hold the offset from the stack pointer at function entry to the
2679 // object.
2680 // We need to factor in additional offsets applied during the prologue to the
2681 // frame, base, and stack pointer depending on which is used.
2682 int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
2684 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2685 uint64_t StackSize = MFI.getStackSize();
2686 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2687 int64_t FPDelta = 0;
2688
2689 // In an x86 interrupt, remove the offset we added to account for the return
2690 // address from any stack object allocated in the caller's frame. Interrupts
2691 // do not have a standard return address. Fixed objects in the current frame,
2692 // such as SSE register spills, should not get this treatment.
2694 Offset >= 0) {
2696 }
2697
2698 if (IsWin64Prologue) {
2699 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2700
2701 // Calculate required stack adjustment.
2702 uint64_t FrameSize = StackSize - SlotSize;
2703 // If required, include space for extra hidden slot for stashing base
2704 // pointer.
2705 if (X86FI->getRestoreBasePointer())
2706 FrameSize += SlotSize;
2707 uint64_t NumBytes = FrameSize - CSSize;
2708
2709 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2710 if (FI && FI == X86FI->getFAIndex())
2711 return StackOffset::getFixed(-SEHFrameOffset);
2712
2713 // FPDelta is the offset from the "traditional" FP location of the old base
2714 // pointer followed by return address and the location required by the
2715 // restricted Win64 prologue.
2716 // Add FPDelta to all offsets below that go through the frame pointer.
2717 FPDelta = FrameSize - SEHFrameOffset;
2718 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2719 "FPDelta isn't aligned per the Win64 ABI!");
2720 }
2721
2722 if (FrameReg == TRI->getFramePtr()) {
2723 // Skip saved EBP/RBP
2724 Offset += SlotSize;
2725
2726 // Account for restricted Windows prologue.
2727 Offset += FPDelta;
2728
2729 // Skip the RETADDR move area
2730 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2731 if (TailCallReturnAddrDelta < 0)
2732 Offset -= TailCallReturnAddrDelta;
2733
2735 }
2736
2737 // FrameReg is either the stack pointer or a base pointer. But the base is
2738 // located at the end of the statically known StackSize so the distinction
2739 // doesn't really matter.
2740 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2741 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2742 return StackOffset::getFixed(Offset + StackSize);
2743}
2744
2746 Register &FrameReg) const {
2747 const MachineFrameInfo &MFI = MF.getFrameInfo();
2749 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2750 const auto it = WinEHXMMSlotInfo.find(FI);
2751
2752 if (it == WinEHXMMSlotInfo.end())
2753 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2754
2755 FrameReg = TRI->getStackRegister();
2756 return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) +
2757 it->second;
2758}
2759
2762 Register &FrameReg,
2763 int Adjustment) const {
2764 const MachineFrameInfo &MFI = MF.getFrameInfo();
2765 FrameReg = TRI->getStackRegister();
2766 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2767 getOffsetOfLocalArea() + Adjustment);
2768}
2769
2772 int FI, Register &FrameReg,
2773 bool IgnoreSPUpdates) const {
2774
2775 const MachineFrameInfo &MFI = MF.getFrameInfo();
2776 // Does not include any dynamic realign.
2777 const uint64_t StackSize = MFI.getStackSize();
2778 // LLVM arranges the stack as follows:
2779 // ...
2780 // ARG2
2781 // ARG1
2782 // RETADDR
2783 // PUSH RBP <-- RBP points here
2784 // PUSH CSRs
2785 // ~~~~~~~ <-- possible stack realignment (non-win64)
2786 // ...
2787 // STACK OBJECTS
2788 // ... <-- RSP after prologue points here
2789 // ~~~~~~~ <-- possible stack realignment (win64)
2790 //
2791 // if (hasVarSizedObjects()):
2792 // ... <-- "base pointer" (ESI/RBX) points here
2793 // DYNAMIC ALLOCAS
2794 // ... <-- RSP points here
2795 //
2796 // Case 1: In the simple case of no stack realignment and no dynamic
2797 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2798 // with fixed offsets from RSP.
2799 //
2800 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2801 // stack objects are addressed with RBP and regular stack objects with RSP.
2802 //
2803 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2804 // to address stack arguments for outgoing calls and nothing else. The "base
2805 // pointer" points to local variables, and RBP points to fixed objects.
2806 //
2807 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2808 // answer we give is relative to the SP after the prologue, and not the
2809 // SP in the middle of the function.
2810
2811 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2812 !STI.isTargetWin64())
2813 return getFrameIndexReference(MF, FI, FrameReg);
2814
2815 // If !hasReservedCallFrame the function might have SP adjustement in the
2816 // body. So, even though the offset is statically known, it depends on where
2817 // we are in the function.
2818 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2819 return getFrameIndexReference(MF, FI, FrameReg);
2820
2821 // We don't handle tail calls, and shouldn't be seeing them either.
2823 "we don't handle this case!");
2824
2825 // This is how the math works out:
2826 //
2827 // %rsp grows (i.e. gets lower) left to right. Each box below is
2828 // one word (eight bytes). Obj0 is the stack slot we're trying to
2829 // get to.
2830 //
2831 // ----------------------------------
2832 // | BP | Obj0 | Obj1 | ... | ObjN |
2833 // ----------------------------------
2834 // ^ ^ ^ ^
2835 // A B C E
2836 //
2837 // A is the incoming stack pointer.
2838 // (B - A) is the local area offset (-8 for x86-64) [1]
2839 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2840 //
2841 // |(E - B)| is the StackSize (absolute value, positive). For a
2842 // stack that grown down, this works out to be (B - E). [3]
2843 //
2844 // E is also the value of %rsp after stack has been set up, and we
2845 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2846 // (C - E) == (C - A) - (B - A) + (B - E)
2847 // { Using [1], [2] and [3] above }
2848 // == getObjectOffset - LocalAreaOffset + StackSize
2849
2850 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2851}
2852
2855 std::vector<CalleeSavedInfo> &CSI) const {
2856 MachineFrameInfo &MFI = MF.getFrameInfo();
2858
2859 unsigned CalleeSavedFrameSize = 0;
2860 unsigned XMMCalleeSavedFrameSize = 0;
2861 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2862 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2863
2864 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2865
2866 if (TailCallReturnAddrDelta < 0) {
2867 // create RETURNADDR area
2868 // arg
2869 // arg
2870 // RETADDR
2871 // { ...
2872 // RETADDR area
2873 // ...
2874 // }
2875 // [EBP]
2876 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2877 TailCallReturnAddrDelta - SlotSize, true);
2878 }
2879
2880 // Spill the BasePtr if it's used.
2881 if (this->TRI->hasBasePointer(MF)) {
2882 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2883 if (MF.hasEHFunclets()) {
2885 X86FI->setHasSEHFramePtrSave(true);
2886 X86FI->setSEHFramePtrSaveIndex(FI);
2887 }
2888 }
2889
2890 if (hasFP(MF)) {
2891 // emitPrologue always spills frame register the first thing.
2892 SpillSlotOffset -= SlotSize;
2893 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2894
2895 // The async context lives directly before the frame pointer, and we
2896 // allocate a second slot to preserve stack alignment.
2897 if (X86FI->hasSwiftAsyncContext()) {
2898 SpillSlotOffset -= SlotSize;
2899 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2900 SpillSlotOffset -= SlotSize;
2901 }
2902
2903 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2904 // the frame register, we can delete it from CSI list and not have to worry
2905 // about avoiding it later.
2906 Register FPReg = TRI->getFrameRegister(MF);
2907 for (unsigned i = 0; i < CSI.size(); ++i) {
2908 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2909 CSI.erase(CSI.begin() + i);
2910 break;
2911 }
2912 }
2913 }
2914
2915 // Strategy:
2916 // 1. Use push2 when
2917 // a) number of CSR > 1 if no need padding
2918 // b) number of CSR > 2 if need padding
2919 // c) stack alignment >= 16 bytes
2920 // 2. When the number of CSR push is odd
2921 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2922 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2923 // 3. When the number of CSR push is even, start to use push2 from the 1st
2924 // push and make the stack 16B aligned before the push
2925 unsigned NumRegsForPush2 = 0;
2926 if (STI.hasPush2Pop2() && getStackAlignment() >= 16) {
2927 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2928 return X86::GR64RegClass.contains(I.getReg());
2929 });
2930 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2931 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2932 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2933 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2934 if (X86FI->padForPush2Pop2()) {
2935 SpillSlotOffset -= SlotSize;
2936 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2937 }
2938 }
2939
2940 // Assign slots for GPRs. It increases frame size.
2941 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2942 MCRegister Reg = I.getReg();
2943
2944 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2945 continue;
2946
2947 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2948 // or only an odd number of registers in the candidates.
2949 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2950 (SpillSlotOffset % 16 == 0 ||
2951 X86FI->getNumCandidatesForPush2Pop2() % 2))
2952 X86FI->addCandidateForPush2Pop2(Reg);
2953
2954 SpillSlotOffset -= SlotSize;
2955 CalleeSavedFrameSize += SlotSize;
2956
2957 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2958 I.setFrameIdx(SlotIndex);
2959 }
2960
2961 // Adjust the offset of spill slot as we know the accurate callee saved frame
2962 // size.
2963 if (X86FI->getRestoreBasePointer()) {
2964 SpillSlotOffset -= SlotSize;
2965 CalleeSavedFrameSize += SlotSize;
2966
2967 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2968 // TODO: saving the slot index is better?
2969 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2970 }
2971 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2972 "Expect even candidates for push2/pop2");
2973 if (X86FI->getNumCandidatesForPush2Pop2())
2974 ++NumFunctionUsingPush2Pop2;
2975 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2976 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2977
2978 // Assign slots for XMMs.
2979 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2980 MCRegister Reg = I.getReg();
2981 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2982 continue;
2983
2984 // If this is k-register make sure we lookup via the largest legal type.
2985 MVT VT = MVT::Other;
2986 if (X86::VK16RegClass.contains(Reg))
2987 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2988
2989 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2990 unsigned Size = TRI->getSpillSize(*RC);
2991 Align Alignment = TRI->getSpillAlign(*RC);
2992 // ensure alignment
2993 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2994 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2995
2996 // spill into slot
2997 SpillSlotOffset -= Size;
2998 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2999 I.setFrameIdx(SlotIndex);
3000 MFI.ensureMaxAlignment(Alignment);
3001
3002 // Save the start offset and size of XMM in stack frame for funclets.
3003 if (X86::VR128RegClass.contains(Reg)) {
3004 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
3005 XMMCalleeSavedFrameSize += Size;
3006 }
3007 }
3008
3009 return true;
3010}
3011
3015 DebugLoc DL = MBB.findDebugLoc(MI);
3016
3017 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
3018 // for us, and there are no XMM CSRs on Win32.
3019 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
3020 return true;
3021
3022 // Push GPRs. It increases frame size.
3023 const MachineFunction &MF = *MBB.getParent();
3025 if (X86FI->padForPush2Pop2()) {
3026 assert(SlotSize == 8 && "Unexpected slot size for padding!");
3027 BuildMI(MBB, MI, DL, TII.get(X86::PUSH64r))
3028 .addReg(X86::RAX, RegState::Undef)
3030 }
3031
3032 // Update LiveIn of the basic block and decide whether we can add a kill flag
3033 // to the use.
3034 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
3035 const MachineRegisterInfo &MRI = MF.getRegInfo();
3036 // Do not set a kill flag on values that are also marked as live-in. This
3037 // happens with the @llvm-returnaddress intrinsic and with arguments
3038 // passed in callee saved registers.
3039 // Omitting the kill flags is conservatively correct even if the live-in
3040 // is not used after all.
3041 if (MRI.isLiveIn(Reg))
3042 return false;
3043 MBB.addLiveIn(Reg);
3044 // Check if any subregister is live-in
3045 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3046 if (MRI.isLiveIn(*AReg))
3047 return false;
3048 return true;
3049 };
3050 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3051 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
3052 };
3053
3054 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3055 MCRegister Reg = RI->getReg();
3056 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3057 continue;
3058
3059 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3060 MCRegister Reg2 = (++RI)->getReg();
3062 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3063 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3065 } else {
3066 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3067 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3069 }
3070 }
3071
3072 if (X86FI->getRestoreBasePointer()) {
3073 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3074 Register BaseReg = this->TRI->getBaseRegister();
3075 BuildMI(MBB, MI, DL, TII.get(Opc))
3076 .addReg(BaseReg, getKillRegState(true))
3078 }
3079
3080 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3081 // It can be done by spilling XMMs to stack frame.
3082 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3083 MCRegister Reg = I.getReg();
3084 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3085 continue;
3086
3087 // If this is k-register make sure we lookup via the largest legal type.
3088 MVT VT = MVT::Other;
3089 if (X86::VK16RegClass.contains(Reg))
3090 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3091
3092 // Add the callee-saved register as live-in. It's killed at the spill.
3093 MBB.addLiveIn(Reg);
3094 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3095
3096 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3098 }
3099
3100 return true;
3101}
3102
3103void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3105 MachineInstr *CatchRet) const {
3106 // SEH shouldn't use catchret.
3108 MBB.getParent()->getFunction().getPersonalityFn())) &&
3109 "SEH should not use CATCHRET");
3110 const DebugLoc &DL = CatchRet->getDebugLoc();
3111 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3112
3113 // Fill EAX/RAX with the address of the target block.
3114 if (STI.is64Bit()) {
3115 // LEA64r CatchRetTarget(%rip), %rax
3116 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3117 .addReg(X86::RIP)
3118 .addImm(0)
3119 .addReg(0)
3120 .addMBB(CatchRetTarget)
3121 .addReg(0);
3122 } else {
3123 // MOV32ri $CatchRetTarget, %eax
3124 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3125 .addMBB(CatchRetTarget);
3126 }
3127
3128 // Record that we've taken the address of CatchRetTarget and no longer just
3129 // reference it in a terminator.
3130 CatchRetTarget->setMachineBlockAddressTaken();
3131}
3132
3136 if (CSI.empty())
3137 return false;
3138
3139 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3140 // Don't restore CSRs in 32-bit EH funclets. Matches
3141 // spillCalleeSavedRegisters.
3142 if (STI.is32Bit())
3143 return true;
3144 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3145 // funclets. emitEpilogue transforms these to normal jumps.
3146 if (MI->getOpcode() == X86::CATCHRET) {
3147 const Function &F = MBB.getParent()->getFunction();
3148 bool IsSEH = isAsynchronousEHPersonality(
3149 classifyEHPersonality(F.getPersonalityFn()));
3150 if (IsSEH)
3151 return true;
3152 }
3153 }
3154
3155 DebugLoc DL = MBB.findDebugLoc(MI);
3156
3157 // Reload XMMs from stack frame.
3158 for (const CalleeSavedInfo &I : CSI) {
3159 MCRegister Reg = I.getReg();
3160 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3161 continue;
3162
3163 // If this is k-register make sure we lookup via the largest legal type.
3164 MVT VT = MVT::Other;
3165 if (X86::VK16RegClass.contains(Reg))
3166 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3167
3168 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3169 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3170 Register());
3171 }
3172
3173 // Clear the stack slot for spill base pointer register.
3174 MachineFunction &MF = *MBB.getParent();
3176 if (X86FI->getRestoreBasePointer()) {
3177 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3178 Register BaseReg = this->TRI->getBaseRegister();
3179 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3181 }
3182
3183 // POP GPRs.
3184 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3185 MCRegister Reg = I->getReg();
3186 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3187 continue;
3188
3189 if (X86FI->isCandidateForPush2Pop2(Reg))
3190 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3193 else
3194 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3196 }
3197 if (X86FI->padForPush2Pop2())
3198 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3199
3200 return true;
3201}
3202
3204 BitVector &SavedRegs,
3205 RegScavenger *RS) const {
3207
3208 // Spill the BasePtr if it's used.
3209 if (TRI->hasBasePointer(MF)) {
3210 Register BasePtr = TRI->getBaseRegister();
3211 if (STI.isTarget64BitILP32())
3212 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3213 SavedRegs.set(BasePtr);
3214 }
3215}
3216
3217static bool HasNestArgument(const MachineFunction *MF) {
3218 const Function &F = MF->getFunction();
3219 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3220 I++) {
3221 if (I->hasNestAttr() && !I->use_empty())
3222 return true;
3223 }
3224 return false;
3225}
3226
3227/// GetScratchRegister - Get a temp register for performing work in the
3228/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3229/// and the properties of the function either one or two registers will be
3230/// needed. Set primary to true for the first register, false for the second.
3231static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3232 const MachineFunction &MF, bool Primary) {
3233 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3234
3235 // Erlang stuff.
3236 if (CallingConvention == CallingConv::HiPE) {
3237 if (Is64Bit)
3238 return Primary ? X86::R14 : X86::R13;
3239 else
3240 return Primary ? X86::EBX : X86::EDI;
3241 }
3242
3243 if (Is64Bit) {
3244 if (IsLP64)
3245 return Primary ? X86::R11 : X86::R12;
3246 else
3247 return Primary ? X86::R11D : X86::R12D;
3248 }
3249
3250 bool IsNested = HasNestArgument(&MF);
3251
3252 if (CallingConvention == CallingConv::X86_FastCall ||
3253 CallingConvention == CallingConv::Fast ||
3254 CallingConvention == CallingConv::Tail) {
3255 if (IsNested)
3256 report_fatal_error("Segmented stacks does not support fastcall with "
3257 "nested function.");
3258 return Primary ? X86::EAX : X86::ECX;
3259 }
3260 if (IsNested)
3261 return Primary ? X86::EDX : X86::EAX;
3262 return Primary ? X86::ECX : X86::EAX;
3263}
3264
3265// The stack limit in the TCB is set to this many bytes above the actual stack
3266// limit.
3268
3270 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3271 MachineFrameInfo &MFI = MF.getFrameInfo();
3272 uint64_t StackSize;
3273 unsigned TlsReg, TlsOffset;
3274 DebugLoc DL;
3275
3276 // To support shrink-wrapping we would need to insert the new blocks
3277 // at the right place and update the branches to PrologueMBB.
3278 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3279
3280 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3281 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3282 "Scratch register is live-in");
3283
3284 if (MF.getFunction().isVarArg())
3285 report_fatal_error("Segmented stacks do not support vararg functions.");
3286 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3287 !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
3288 !STI.isTargetDragonFly())
3289 report_fatal_error("Segmented stacks not supported on this platform.");
3290
3291 // Eventually StackSize will be calculated by a link-time pass; which will
3292 // also decide whether checking code needs to be injected into this particular
3293 // prologue.
3294 StackSize = MFI.getStackSize();
3295
3296 if (!MFI.needsSplitStackProlog())
3297 return;
3298
3302 bool IsNested = false;
3303
3304 // We need to know if the function has a nest argument only in 64 bit mode.
3305 if (Is64Bit)
3306 IsNested = HasNestArgument(&MF);
3307
3308 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3309 // allocMBB needs to be last (terminating) instruction.
3310
3311 for (const auto &LI : PrologueMBB.liveins()) {
3312 allocMBB->addLiveIn(LI);
3313 checkMBB->addLiveIn(LI);
3314 }
3315
3316 if (IsNested)
3317 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3318
3319 MF.push_front(allocMBB);
3320 MF.push_front(checkMBB);
3321
3322 // When the frame size is less than 256 we just compare the stack
3323 // boundary directly to the value of the stack pointer, per gcc.
3324 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3325
3326 // Read the limit off the current stacklet off the stack_guard location.
3327 if (Is64Bit) {
3328 if (STI.isTargetLinux()) {
3329 TlsReg = X86::FS;
3330 TlsOffset = IsLP64 ? 0x70 : 0x40;
3331 } else if (STI.isTargetDarwin()) {
3332 TlsReg = X86::GS;
3333 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3334 } else if (STI.isTargetWin64()) {
3335 TlsReg = X86::GS;
3336 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3337 } else if (STI.isTargetFreeBSD()) {
3338 TlsReg = X86::FS;
3339 TlsOffset = 0x18;
3340 } else if (STI.isTargetDragonFly()) {
3341 TlsReg = X86::FS;
3342 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3343 } else {
3344 report_fatal_error("Segmented stacks not supported on this platform.");
3345 }
3346
3347 if (CompareStackPointer)
3348 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3349 else
3350 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3351 ScratchReg)
3352 .addReg(X86::RSP)
3353 .addImm(1)
3354 .addReg(0)
3355 .addImm(-StackSize)
3356 .addReg(0);
3357
3358 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3359 .addReg(ScratchReg)
3360 .addReg(0)
3361 .addImm(1)
3362 .addReg(0)
3363 .addImm(TlsOffset)
3364 .addReg(TlsReg);
3365 } else {
3366 if (STI.isTargetLinux()) {
3367 TlsReg = X86::GS;
3368 TlsOffset = 0x30;
3369 } else if (STI.isTargetDarwin()) {
3370 TlsReg = X86::GS;
3371 TlsOffset = 0x48 + 90 * 4;
3372 } else if (STI.isTargetWin32()) {
3373 TlsReg = X86::FS;
3374 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3375 } else if (STI.isTargetDragonFly()) {
3376 TlsReg = X86::FS;
3377 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3378 } else if (STI.isTargetFreeBSD()) {
3379 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3380 } else {
3381 report_fatal_error("Segmented stacks not supported on this platform.");
3382 }
3383
3384 if (CompareStackPointer)
3385 ScratchReg = X86::ESP;
3386 else
3387 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3388 .addReg(X86::ESP)
3389 .addImm(1)
3390 .addReg(0)
3391 .addImm(-StackSize)
3392 .addReg(0);
3393
3394 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
3395 STI.isTargetDragonFly()) {
3396 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3397 .addReg(ScratchReg)
3398 .addReg(0)
3399 .addImm(0)
3400 .addReg(0)
3401 .addImm(TlsOffset)
3402 .addReg(TlsReg);
3403 } else if (STI.isTargetDarwin()) {
3404
3405 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3406 unsigned ScratchReg2;
3407 bool SaveScratch2;
3408 if (CompareStackPointer) {
3409 // The primary scratch register is available for holding the TLS offset.
3410 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3411 SaveScratch2 = false;
3412 } else {
3413 // Need to use a second register to hold the TLS offset
3414 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3415
3416 // Unfortunately, with fastcc the second scratch register may hold an
3417 // argument.
3418 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3419 }
3420
3421 // If Scratch2 is live-in then it needs to be saved.
3422 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3423 "Scratch register is live-in and not saved");
3424
3425 if (SaveScratch2)
3426 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3427 .addReg(ScratchReg2, RegState::Kill);
3428
3429 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3430 .addImm(TlsOffset);
3431 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3432 .addReg(ScratchReg)
3433 .addReg(ScratchReg2)
3434 .addImm(1)
3435 .addReg(0)
3436 .addImm(0)
3437 .addReg(TlsReg);
3438
3439 if (SaveScratch2)
3440 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3441 }
3442 }
3443
3444 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3445 // It jumps to normal execution of the function body.
3446 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3447 .addMBB(&PrologueMBB)
3449
3450 // On 32 bit we first push the arguments size and then the frame size. On 64
3451 // bit, we pass the stack frame size in r10 and the argument size in r11.
3452 if (Is64Bit) {
3453 // Functions with nested arguments use R10, so it needs to be saved across
3454 // the call to _morestack
3455
3456 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3457 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3458 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3459 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3460
3461 if (IsNested)
3462 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3463
3464 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3465 .addImm(StackSize);
3466 BuildMI(allocMBB, DL,
3468 Reg11)
3469 .addImm(X86FI->getArgumentStackSize());
3470 } else {
3471 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3472 .addImm(X86FI->getArgumentStackSize());
3473 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3474 }
3475
3476 // __morestack is in libgcc
3478 // Under the large code model, we cannot assume that __morestack lives
3479 // within 2^31 bytes of the call site, so we cannot use pc-relative
3480 // addressing. We cannot perform the call via a temporary register,
3481 // as the rax register may be used to store the static chain, and all
3482 // other suitable registers may be either callee-save or used for
3483 // parameter passing. We cannot use the stack at this point either
3484 // because __morestack manipulates the stack directly.
3485 //
3486 // To avoid these issues, perform an indirect call via a read-only memory
3487 // location containing the address.
3488 //
3489 // This solution is not perfect, as it assumes that the .rodata section
3490 // is laid out within 2^31 bytes of each function body, but this seems
3491 // to be sufficient for JIT.
3492 // FIXME: Add retpoline support and remove the error here..
3493 if (STI.useIndirectThunkCalls())
3494 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3495 "code model and thunks not yet implemented.");
3496 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3497 .addReg(X86::RIP)
3498 .addImm(0)
3499 .addReg(0)
3500 .addExternalSymbol("__morestack_addr")
3501 .addReg(0);
3502 } else {
3503 if (Is64Bit)
3504 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3505 .addExternalSymbol("__morestack");
3506 else
3507 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3508 .addExternalSymbol("__morestack");
3509 }
3510
3511 if (IsNested)
3512 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3513 else
3514 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3515
3516 allocMBB->addSuccessor(&PrologueMBB);
3517
3518 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3519 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3520
3521#ifdef EXPENSIVE_CHECKS
3522 MF.verify();
3523#endif
3524}
3525
3526/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3527/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3528/// to fields it needs, through a named metadata node "hipe.literals" containing
3529/// name-value pairs.
3530static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3531 const StringRef LiteralName) {
3532 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3533 MDNode *Node = HiPELiteralsMD->getOperand(i);
3534 if (Node->getNumOperands() != 2)
3535 continue;
3536 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3537 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3538 if (!NodeName || !NodeVal)
3539 continue;
3540 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3541 if (ValConst && NodeName->getString() == LiteralName) {
3542 return ValConst->getZExtValue();
3543 }
3544 }
3545
3546 report_fatal_error("HiPE literal " + LiteralName +
3547 " required but not provided");
3548}
3549
3550// Return true if there are no non-ehpad successors to MBB and there are no
3551// non-meta instructions between MBBI and MBB.end().
3554 return llvm::all_of(
3555 MBB.successors(),
3556 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3557 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3558 return MI.isMetaInstruction();
3559 });
3560}
3561
3562/// Erlang programs may need a special prologue to handle the stack size they
3563/// might need at runtime. That is because Erlang/OTP does not implement a C
3564/// stack but uses a custom implementation of hybrid stack/heap architecture.
3565/// (for more information see Eric Stenman's Ph.D. thesis:
3566/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3567///
3568/// CheckStack:
3569/// temp0 = sp - MaxStack
3570/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3571/// OldStart:
3572/// ...
3573/// IncStack:
3574/// call inc_stack # doubles the stack space
3575/// temp0 = sp - MaxStack
3576/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3578 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3579 MachineFrameInfo &MFI = MF.getFrameInfo();
3580 DebugLoc DL;
3581
3582 // To support shrink-wrapping we would need to insert the new blocks
3583 // at the right place and update the branches to PrologueMBB.
3584 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3585
3586 // HiPE-specific values
3587 NamedMDNode *HiPELiteralsMD =
3588 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3589 if (!HiPELiteralsMD)
3591 "Can't generate HiPE prologue without runtime parameters");
3592 const unsigned HipeLeafWords = getHiPELiteral(
3593 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3594 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3595 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3596 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3597 ? MF.getFunction().arg_size() - CCRegisteredArgs
3598 : 0;
3599 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3600
3601 assert(STI.isTargetLinux() &&
3602 "HiPE prologue is only supported on Linux operating systems.");
3603
3604 // Compute the largest caller's frame that is needed to fit the callees'
3605 // frames. This 'MaxStack' is computed from:
3606 //
3607 // a) the fixed frame size, which is the space needed for all spilled temps,
3608 // b) outgoing on-stack parameter areas, and
3609 // c) the minimum stack space this function needs to make available for the
3610 // functions it calls (a tunable ABI property).
3611 if (MFI.hasCalls()) {
3612 unsigned MoreStackForCalls = 0;
3613
3614 for (auto &MBB : MF) {
3615 for (auto &MI : MBB) {
3616 if (!MI.isCall())
3617 continue;
3618
3619 // Get callee operand.
3620 const MachineOperand &MO = MI.getOperand(0);
3621
3622 // Only take account of global function calls (no closures etc.).
3623 if (!MO.isGlobal())
3624 continue;
3625
3626 const Function *F = dyn_cast<Function>(MO.getGlobal());
3627 if (!F)
3628 continue;
3629
3630 // Do not update 'MaxStack' for primitive and built-in functions
3631 // (encoded with names either starting with "erlang."/"bif_" or not
3632 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3633 // "_", such as the BIF "suspend_0") as they are executed on another
3634 // stack.
3635 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3636 F->getName().find_first_of("._") == StringRef::npos)
3637 continue;
3638
3639 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3640 ? F->arg_size() - CCRegisteredArgs
3641 : 0;
3642 if (HipeLeafWords - 1 > CalleeStkArity)
3643 MoreStackForCalls =
3644 std::max(MoreStackForCalls,
3645 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3646 }
3647 }
3648 MaxStack += MoreStackForCalls;
3649 }
3650
3651 // If the stack frame needed is larger than the guaranteed then runtime checks
3652 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3653 if (MaxStack > Guaranteed) {
3654 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3655 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3656
3657 for (const auto &LI : PrologueMBB.liveins()) {
3658 stackCheckMBB->addLiveIn(LI);
3659 incStackMBB->addLiveIn(LI);
3660 }
3661
3662 MF.push_front(incStackMBB);
3663 MF.push_front(stackCheckMBB);
3664
3665 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3666 unsigned LEAop, CMPop, CALLop;
3667 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3668 if (Is64Bit) {
3669 SPReg = X86::RSP;
3670 PReg = X86::RBP;
3671 LEAop = X86::LEA64r;
3672 CMPop = X86::CMP64rm;
3673 CALLop = X86::CALL64pcrel32;
3674 } else {
3675 SPReg = X86::ESP;
3676 PReg = X86::EBP;
3677 LEAop = X86::LEA32r;
3678 CMPop = X86::CMP32rm;
3679 CALLop = X86::CALLpcrel32;
3680 }
3681
3682 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3683 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3684 "HiPE prologue scratch register is live-in");
3685
3686 // Create new MBB for StackCheck:
3687 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3688 false, -MaxStack);
3689 // SPLimitOffset is in a fixed heap location (pointed by BP).
3690 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3691 PReg, false, SPLimitOffset);
3692 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3693 .addMBB(&PrologueMBB)
3695
3696 // Create new MBB for IncStack:
3697 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3698 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3699 false, -MaxStack);
3700 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3701 PReg, false, SPLimitOffset);
3702 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3703 .addMBB(incStackMBB)
3705
3706 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3707 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3708 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3709 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3710 }
3711#ifdef EXPENSIVE_CHECKS
3712 MF.verify();
3713#endif
3714}
3715
3716bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3718 const DebugLoc &DL,
3719 int Offset) const {
3720 if (Offset <= 0)
3721 return false;
3722
3723 if (Offset % SlotSize)
3724 return false;
3725
3726 int NumPops = Offset / SlotSize;
3727 // This is only worth it if we have at most 2 pops.
3728 if (NumPops != 1 && NumPops != 2)
3729 return false;
3730
3731 // Handle only the trivial case where the adjustment directly follows
3732 // a call. This is the most common one, anyway.
3733 if (MBBI == MBB.begin())
3734 return false;
3735 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3736 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3737 return false;
3738
3739 unsigned Regs[2];
3740 unsigned FoundRegs = 0;
3741
3742 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3743 const MachineOperand &RegMask = Prev->getOperand(1);
3744
3745 auto &RegClass =
3746 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3747 // Try to find up to NumPops free registers.
3748 for (auto Candidate : RegClass) {
3749 // Poor man's liveness:
3750 // Since we're immediately after a call, any register that is clobbered
3751 // by the call and not defined by it can be considered dead.
3752 if (!RegMask.clobbersPhysReg(Candidate))
3753 continue;
3754
3755 // Don't clobber reserved registers
3756 if (MRI.isReserved(Candidate))
3757 continue;
3758
3759 bool IsDef = false;
3760 for (const MachineOperand &MO : Prev->implicit_operands()) {
3761 if (MO.isReg() && MO.isDef() &&
3762 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3763 IsDef = true;
3764 break;
3765 }
3766 }
3767
3768 if (IsDef)
3769 continue;
3770
3771 Regs[FoundRegs++] = Candidate;
3772 if (FoundRegs == (unsigned)NumPops)
3773 break;
3774 }
3775
3776 if (FoundRegs == 0)
3777 return false;
3778
3779 // If we found only one free register, but need two, reuse the same one twice.
3780 while (FoundRegs < (unsigned)NumPops)
3781 Regs[FoundRegs++] = Regs[0];
3782
3783 for (int i = 0; i < NumPops; ++i)
3784 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3785 Regs[i]);
3786
3787 return true;
3788}
3789
3793 bool reserveCallFrame = hasReservedCallFrame(MF);
3794 unsigned Opcode = I->getOpcode();
3795 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3796 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3797 uint64_t Amount = TII.getFrameSize(*I);
3798 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3799 I = MBB.erase(I);
3800 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3801
3802 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3803 // typically because the function is marked noreturn (abort, throw,
3804 // assert_fail, etc).
3805 if (isDestroy && blockEndIsUnreachable(MBB, I))
3806 return I;
3807
3808 if (!reserveCallFrame) {
3809 // If the stack pointer can be changed after prologue, turn the
3810 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3811 // adjcallstackdown instruction into 'add ESP, <amt>'
3812
3813 // We need to keep the stack aligned properly. To do this, we round the
3814 // amount of space needed for the outgoing arguments up to the next
3815 // alignment boundary.
3816 Amount = alignTo(Amount, getStackAlign());
3817
3818 const Function &F = MF.getFunction();
3819 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3820 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3821
3822 // If we have any exception handlers in this function, and we adjust
3823 // the SP before calls, we may need to indicate this to the unwinder
3824 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3825 // Amount == 0, because the preceding function may have set a non-0
3826 // GNU_ARGS_SIZE.
3827 // TODO: We don't need to reset this between subsequent functions,
3828 // if it didn't change.
3829 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3830
3831 if (HasDwarfEHHandlers && !isDestroy &&
3833 BuildCFI(MBB, InsertPos, DL,
3834 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3835
3836 if (Amount == 0)
3837 return I;
3838
3839 // Factor out the amount that gets handled inside the sequence
3840 // (Pushes of argument for frame setup, callee pops for frame destroy)
3841 Amount -= InternalAmt;
3842
3843 // TODO: This is needed only if we require precise CFA.
3844 // If this is a callee-pop calling convention, emit a CFA adjust for
3845 // the amount the callee popped.
3846 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3847 BuildCFI(MBB, InsertPos, DL,
3848 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3849
3850 // Add Amount to SP to destroy a frame, or subtract to setup.
3851 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3852 int64_t CfaAdjustment = StackAdjustment;
3853
3854 if (StackAdjustment) {
3855 // Merge with any previous or following adjustment instruction. Note: the
3856 // instructions merged with here do not have CFI, so their stack
3857 // adjustments do not feed into CfaAdjustment
3858
3859 auto CalcCfaAdjust = [&CfaAdjustment](MachineBasicBlock::iterator PI,
3860 int64_t Offset) {
3861 CfaAdjustment += Offset;
3862 };
3863 auto CalcNewOffset = [&StackAdjustment](int64_t Offset) {
3864 return StackAdjustment + Offset;
3865 };
3866 StackAdjustment =
3867 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, true);
3868 StackAdjustment =
3869 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, false);
3870
3871 if (StackAdjustment) {
3872 if (!(F.hasMinSize() &&
3873 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3874 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3875 /*InEpilogue=*/false);
3876 }
3877 }
3878
3879 if (DwarfCFI && !hasFP(MF) && CfaAdjustment) {
3880 // If we don't have FP, but need to generate unwind information,
3881 // we need to set the correct CFA offset after the stack adjustment.
3882 // How much we adjust the CFA offset depends on whether we're emitting
3883 // CFI only for EH purposes or for debugging. EH only requires the CFA
3884 // offset to be correct at each call site, while for debugging we want
3885 // it to be more precise.
3886
3887 // TODO: When not using precise CFA, we also need to adjust for the
3888 // InternalAmt here.
3889 BuildCFI(
3890 MBB, InsertPos, DL,
3891 MCCFIInstruction::createAdjustCfaOffset(nullptr, -CfaAdjustment));
3892 }
3893
3894 return I;
3895 }
3896
3897 if (InternalAmt) {
3900 while (CI != B && !std::prev(CI)->isCall())
3901 --CI;
3902 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3903 }
3904
3905 return I;
3906}
3907
3909 assert(MBB.getParent() && "Block is not attached to a function!");
3910 const MachineFunction &MF = *MBB.getParent();
3911 if (!MBB.isLiveIn(X86::EFLAGS))
3912 return true;
3913
3914 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3915 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3917 const X86TargetLowering &TLI = *STI.getTargetLowering();
3918 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3919 return false;
3920
3922 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3923}
3924
3926 assert(MBB.getParent() && "Block is not attached to a function!");
3927
3928 // Win64 has strict requirements in terms of epilogue and we are
3929 // not taking a chance at messing with them.
3930 // I.e., unless this block is already an exit block, we can't use
3931 // it as an epilogue.
3932 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3933 return false;
3934
3935 // Swift async context epilogue has a BTR instruction that clobbers parts of
3936 // EFLAGS.
3937 const MachineFunction &MF = *MBB.getParent();
3940
3941 if (canUseLEAForSPInEpilogue(*MBB.getParent()))
3942 return true;
3943
3944 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3945 // clobbers the EFLAGS. Check that we do not need to preserve it,
3946 // otherwise, conservatively assume this is not
3947 // safe to insert the epilogue here.
3949}
3950
3952 // If we may need to emit frameless compact unwind information, give
3953 // up as this is currently broken: PR25614.
3954 bool CompactUnwind =
3956 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3957 !CompactUnwind) &&
3958 // The lowering of segmented stack and HiPE only support entry
3959 // blocks as prologue blocks: PR26107. This limitation may be
3960 // lifted if we fix:
3961 // - adjustForSegmentedStacks
3962 // - adjustForHiPEPrologue
3964 !MF.shouldSplitStack();
3965}
3966
3969 const DebugLoc &DL, bool RestoreSP) const {
3970 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3971 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3972 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3973 "restoring EBP/ESI on non-32-bit target");
3974
3975 MachineFunction &MF = *MBB.getParent();
3976 Register FramePtr = TRI->getFrameRegister(MF);
3977 Register BasePtr = TRI->getBaseRegister();
3978 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3980 MachineFrameInfo &MFI = MF.getFrameInfo();
3981
3982 // FIXME: Don't set FrameSetup flag in catchret case.
3983
3984 int FI = FuncInfo.EHRegNodeFrameIndex;
3985 int EHRegSize = MFI.getObjectSize(FI);
3986
3987 if (RestoreSP) {
3988 // MOV32rm -EHRegSize(%ebp), %esp
3989 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3990 X86::EBP, true, -EHRegSize)
3992 }
3993
3994 Register UsedReg;
3995 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3996 int EndOffset = -EHRegOffset - EHRegSize;
3997 FuncInfo.EHRegNodeEndOffset = EndOffset;
3998
3999 if (UsedReg == FramePtr) {
4000 // ADD $offset, %ebp
4001 unsigned ADDri = getADDriOpcode(false);
4002 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
4004 .addImm(EndOffset)
4006 ->getOperand(3)
4007 .setIsDead();
4008 assert(EndOffset >= 0 &&
4009 "end of registration object above normal EBP position!");
4010 } else if (UsedReg == BasePtr) {
4011 // LEA offset(%ebp), %esi
4012 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
4013 FramePtr, false, EndOffset)
4015 // MOV32rm SavedEBPOffset(%esi), %ebp
4016 assert(X86FI->getHasSEHFramePtrSave());
4017 int Offset =
4018 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
4019 .getFixed();
4020 assert(UsedReg == BasePtr);
4021 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
4022 UsedReg, true, Offset)
4024 } else {
4025 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
4026 }
4027 return MBBI;
4028}
4029
4031 return TRI->getSlotSize();
4032}
4033
4038
4042 Register FrameRegister = RI->getFrameRegister(MF);
4043 if (getInitialCFARegister(MF) == FrameRegister &&
4045 DwarfFrameBase FrameBase;
4046 FrameBase.Kind = DwarfFrameBase::CFA;
4047 FrameBase.Location.Offset =
4049 return FrameBase;
4050 }
4051
4052 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
4053}
4054
4055namespace {
4056// Struct used by orderFrameObjects to help sort the stack objects.
4057struct X86FrameSortingObject {
4058 bool IsValid = false; // true if we care about this Object.
4059 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4060 unsigned ObjectSize = 0; // Size of Object in bytes.
4061 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4062 unsigned ObjectNumUses = 0; // Object static number of uses.
4063};
4064
4065// The comparison function we use for std::sort to order our local
4066// stack symbols. The current algorithm is to use an estimated
4067// "density". This takes into consideration the size and number of
4068// uses each object has in order to roughly minimize code size.
4069// So, for example, an object of size 16B that is referenced 5 times
4070// will get higher priority than 4 4B objects referenced 1 time each.
4071// It's not perfect and we may be able to squeeze a few more bytes out of
4072// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4073// fringe end can have special consideration, given their size is less
4074// important, etc.), but the algorithmic complexity grows too much to be
4075// worth the extra gains we get. This gets us pretty close.
4076// The final order leaves us with objects with highest priority going
4077// at the end of our list.
4078struct X86FrameSortingComparator {
4079 inline bool operator()(const X86FrameSortingObject &A,
4080 const X86FrameSortingObject &B) const {
4081 uint64_t DensityAScaled, DensityBScaled;
4082
4083 // For consistency in our comparison, all invalid objects are placed
4084 // at the end. This also allows us to stop walking when we hit the
4085 // first invalid item after it's all sorted.
4086 if (!A.IsValid)
4087 return false;
4088 if (!B.IsValid)
4089 return true;
4090
4091 // The density is calculated by doing :
4092 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4093 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4094 // Since this approach may cause inconsistencies in
4095 // the floating point <, >, == comparisons, depending on the floating
4096 // point model with which the compiler was built, we're going
4097 // to scale both sides by multiplying with
4098 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4099 // the division and, with it, the need for any floating point
4100 // arithmetic.
4101 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4102 static_cast<uint64_t>(B.ObjectSize);
4103 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4104 static_cast<uint64_t>(A.ObjectSize);
4105
4106 // If the two densities are equal, prioritize highest alignment
4107 // objects. This allows for similar alignment objects
4108 // to be packed together (given the same density).
4109 // There's room for improvement here, also, since we can pack
4110 // similar alignment (different density) objects next to each
4111 // other to save padding. This will also require further
4112 // complexity/iterations, and the overall gain isn't worth it,
4113 // in general. Something to keep in mind, though.
4114 if (DensityAScaled == DensityBScaled)
4115 return A.ObjectAlignment < B.ObjectAlignment;
4116
4117 return DensityAScaled < DensityBScaled;
4118 }
4119};
4120} // namespace
4121
4122// Order the symbols in the local stack.
4123// We want to place the local stack objects in some sort of sensible order.
4124// The heuristic we use is to try and pack them according to static number
4125// of uses and size of object in order to minimize code size.
4127 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4128 const MachineFrameInfo &MFI = MF.getFrameInfo();
4129
4130 // Don't waste time if there's nothing to do.
4131 if (ObjectsToAllocate.empty())
4132 return;
4133
4134 // Create an array of all MFI objects. We won't need all of these
4135 // objects, but we're going to create a full array of them to make
4136 // it easier to index into when we're counting "uses" down below.
4137 // We want to be able to easily/cheaply access an object by simply
4138 // indexing into it, instead of having to search for it every time.
4139 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4140
4141 // Walk the objects we care about and mark them as such in our working
4142 // struct.
4143 for (auto &Obj : ObjectsToAllocate) {
4144 SortingObjects[Obj].IsValid = true;
4145 SortingObjects[Obj].ObjectIndex = Obj;
4146 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4147 // Set the size.
4148 int ObjectSize = MFI.getObjectSize(Obj);
4149 if (ObjectSize == 0)
4150 // Variable size. Just use 4.
4151 SortingObjects[Obj].ObjectSize = 4;
4152 else
4153 SortingObjects[Obj].ObjectSize = ObjectSize;
4154 }
4155
4156 // Count the number of uses for each object.
4157 for (auto &MBB : MF) {
4158 for (auto &MI : MBB) {
4159 if (MI.isDebugInstr())
4160 continue;
4161 for (const MachineOperand &MO : MI.operands()) {
4162 // Check to see if it's a local stack symbol.
4163 if (!MO.isFI())
4164 continue;
4165 int Index = MO.getIndex();
4166 // Check to see if it falls within our range, and is tagged
4167 // to require ordering.
4168 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4169 SortingObjects[Index].IsValid)
4170 SortingObjects[Index].ObjectNumUses++;
4171 }
4172 }
4173 }
4174
4175 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4176 // info).
4177 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4178
4179 // Now modify the original list to represent the final order that
4180 // we want. The order will depend on whether we're going to access them
4181 // from the stack pointer or the frame pointer. For SP, the list should
4182 // end up with the END containing objects that we want with smaller offsets.
4183 // For FP, it should be flipped.
4184 int i = 0;
4185 for (auto &Obj : SortingObjects) {
4186 // All invalid items are sorted at the end, so it's safe to stop.
4187 if (!Obj.IsValid)
4188 break;
4189 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4190 }
4191
4192 // Flip it if we're accessing off of the FP.
4193 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4194 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4195}
4196
4197unsigned
4199 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4200 unsigned Offset = 16;
4201 // RBP is immediately pushed.
4202 Offset += SlotSize;
4203 // All callee-saved registers are then pushed.
4204 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4205 // Every funclet allocates enough stack space for the largest outgoing call.
4206 Offset += getWinEHFuncletFrameSize(MF);
4207 return Offset;
4208}
4209
4211 MachineFunction &MF, RegScavenger *RS) const {
4212 // Mark the function as not having WinCFI. We will set it back to true in
4213 // emitPrologue if it gets called and emits CFI.
4214 MF.setHasWinCFI(false);
4215
4216 MachineFrameInfo &MFI = MF.getFrameInfo();
4217 // If the frame is big enough that we might need to scavenge a register to
4218 // handle huge offsets, reserve a stack slot for that now.
4219 if (!isInt<32>(MFI.estimateStackSize(MF))) {
4220 int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false);
4222 }
4223
4224 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4225 // aligned. The format doesn't support misaligned stack adjustments.
4228
4229 // If this function isn't doing Win64-style C++ EH, we don't need to do
4230 // anything.
4231 if (STI.is64Bit() && MF.hasEHFunclets() &&
4234 adjustFrameForMsvcCxxEh(MF);
4235 }
4236}
4237
4238void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4239 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4240 // relative to RSP after the prologue. Find the offset of the last fixed
4241 // object, so that we can allocate a slot immediately following it. If there
4242 // were no fixed objects, use offset -SlotSize, which is immediately after the
4243 // return address. Fixed objects have negative frame indices.
4244 MachineFrameInfo &MFI = MF.getFrameInfo();
4245 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4246 int64_t MinFixedObjOffset = -SlotSize;
4247 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4248 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4249
4250 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4251 for (WinEHHandlerType &H : TBME.HandlerArray) {
4252 int FrameIndex = H.CatchObj.FrameIndex;
4253 if ((FrameIndex != INT_MAX) && MFI.getObjectOffset(FrameIndex) == 0) {
4254 // Ensure alignment.
4255 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4256 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4257 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4258 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4259 }
4260 }
4261 }
4262
4263 // Ensure alignment.
4264 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4265 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4266 int UnwindHelpFI =
4267 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4268 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4269
4270 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4271 // other frame setup instructions.
4272 MachineBasicBlock &MBB = MF.front();
4273 auto MBBI = MBB.begin();
4274 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4275 ++MBBI;
4276
4278 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4279 UnwindHelpFI)
4280 .addImm(-2);
4281}
4282
4284 MachineFunction &MF, RegScavenger *RS) const {
4285 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4286
4287 if (STI.is32Bit() && MF.hasEHFunclets())
4289 // We have emitted prolog and epilog. Don't need stack pointer saving
4290 // instruction any more.
4291 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4292 MI->eraseFromParent();
4293 X86FI->setStackPtrSaveMI(nullptr);
4294 }
4295}
4296
4298 MachineFunction &MF) const {
4299 // 32-bit functions have to restore stack pointers when control is transferred
4300 // back to the parent function. These blocks are identified as eh pads that
4301 // are not funclet entries.
4302 bool IsSEH = isAsynchronousEHPersonality(
4304 for (MachineBasicBlock &MBB : MF) {
4305 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4306 if (NeedsRestore)
4308 /*RestoreSP=*/IsSEH);
4309 }
4310}
4311
4312// Compute the alignment gap between current SP after spilling FP/BP and the
4313// next properly aligned stack offset.
4315 const TargetRegisterClass *RC,
4316 unsigned NumSpilledRegs) {
4318 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4319 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4320 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4321 return AlignedSize - AllocSize;
4322}
4323
4324void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4326 Register FP, Register BP,
4327 int SPAdjust) const {
4328 assert(FP.isValid() || BP.isValid());
4329
4330 MachineBasicBlock *MBB = BeforeMI->getParent();
4331 DebugLoc DL = BeforeMI->getDebugLoc();
4332
4333 // Spill FP.
4334 if (FP.isValid()) {
4335 BuildMI(*MBB, BeforeMI, DL,
4336 TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
4337 .addReg(FP);
4338 }
4339
4340 // Spill BP.
4341 if (BP.isValid()) {
4342 BuildMI(*MBB, BeforeMI, DL,
4343 TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
4344 .addReg(BP);
4345 }
4346
4347 // Make sure SP is aligned.
4348 if (SPAdjust)
4349 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4350
4351 // Emit unwinding information.
4352 if (FP.isValid() && needsDwarfCFI(MF)) {
4353 // Emit .cfi_remember_state to remember old frame.
4354 unsigned CFIIndex =
4356 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4357 .addCFIIndex(CFIIndex);
4358
4359 // Setup new CFA value with DW_CFA_def_cfa_expression:
4360 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4361 SmallString<64> CfaExpr;
4362 uint8_t buffer[16];
4363 int Offset = SPAdjust;
4364 if (BP.isValid())
4365 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4366 // If BeforeMI is a frame setup instruction, we need to adjust the position
4367 // and offset of the new cfi instruction.
4368 if (TII.isFrameSetup(*BeforeMI)) {
4369 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4370 BeforeMI = std::next(BeforeMI);
4371 }
4372 Register StackPtr = TRI->getStackRegister();
4373 if (STI.isTarget64BitILP32())
4375 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4376 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4377 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4378 CfaExpr.push_back(dwarf::DW_OP_deref);
4379 CfaExpr.push_back(dwarf::DW_OP_consts);
4380 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4381 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4382
4383 SmallString<64> DefCfaExpr;
4384 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4385 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4386 DefCfaExpr.append(CfaExpr.str());
4387 BuildCFI(*MBB, BeforeMI, DL,
4388 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4390 }
4391}
4392
4393void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4395 Register FP, Register BP,
4396 int SPAdjust) const {
4397 assert(FP.isValid() || BP.isValid());
4398
4399 // Adjust SP so it points to spilled FP or BP.
4400 MachineBasicBlock *MBB = AfterMI->getParent();
4401 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4402 DebugLoc DL = AfterMI->getDebugLoc();
4403 if (SPAdjust)
4404 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4405
4406 // Restore BP.
4407 if (BP.isValid()) {
4408 BuildMI(*MBB, Pos, DL,
4409 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4410 }
4411
4412 // Restore FP.
4413 if (FP.isValid()) {
4414 BuildMI(*MBB, Pos, DL,
4415 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), FP);
4416
4417 // Emit unwinding information.
4418 if (needsDwarfCFI(MF)) {
4419 // Restore original frame with .cfi_restore_state.
4420 unsigned CFIIndex =
4422 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4423 .addCFIIndex(CFIIndex);
4424 }
4425 }
4426}
4427
4428void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4430 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4431 assert(SpillFP || SpillBP);
4432
4433 Register FP, BP;
4434 const TargetRegisterClass *RC;
4435 unsigned NumRegs = 0;
4436
4437 if (SpillFP) {
4438 FP = TRI->getFrameRegister(MF);
4439 if (STI.isTarget64BitILP32())
4441 RC = TRI->getMinimalPhysRegClass(FP);
4442 ++NumRegs;
4443 }
4444 if (SpillBP) {
4445 BP = TRI->getBaseRegister();
4446 if (STI.isTarget64BitILP32())
4447 BP = Register(getX86SubSuperRegister(BP, 64));
4448 RC = TRI->getMinimalPhysRegClass(BP);
4449 ++NumRegs;
4450 }
4451 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4452
4453 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4454 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4455}
4456
4457bool X86FrameLowering::skipSpillFPBP(
4459 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4460 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4461 // SaveRbx = COPY RBX
4462 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4463 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4464 // We should skip this instruction sequence.
4465 int FI;
4466 Register Reg;
4467 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4468 MI->getOperand(1).getReg() == X86::RBX) &&
4469 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4470 ++MI;
4471 return true;
4472 }
4473 return false;
4474}
4475
4477 const TargetRegisterInfo *TRI, bool &AccessFP,
4478 bool &AccessBP) {
4479 AccessFP = AccessBP = false;
4480 if (FP) {
4481 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4482 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4483 AccessFP = true;
4484 }
4485 if (BP) {
4486 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4487 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4488 AccessBP = true;
4489 }
4490 return AccessFP || AccessBP;
4491}
4492
4493// Invoke instruction has been lowered to normal function call. We try to figure
4494// out if MI comes from Invoke.
4495// Do we have any better method?
4496static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4497 if (!MI.isCall())
4498 return false;
4499 if (InsideEHLabels)
4500 return true;
4501
4502 const MachineBasicBlock *MBB = MI.getParent();
4503 if (!MBB->hasEHPadSuccessor())
4504 return false;
4505
4506 // Check if there is another call instruction from MI to the end of MBB.
4508 for (++MBBI; MBBI != ME; ++MBBI)
4509 if (MBBI->isCall())
4510 return false;
4511 return true;
4512}
4513
4514/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4515/// interfered stack access in the range, usually generated by register spill.
4516void X86FrameLowering::checkInterferedAccess(
4518 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4519 bool SpillBP) const {
4520 if (DefMI == KillMI)
4521 return;
4522 if (TRI->hasBasePointer(MF)) {
4523 if (!SpillBP)
4524 return;
4525 } else {
4526 if (!SpillFP)
4527 return;
4528 }
4529
4530 auto MI = KillMI;
4531 while (MI != DefMI) {
4532 if (any_of(MI->operands(),
4533 [](const MachineOperand &MO) { return MO.isFI(); }))
4534 MF.getContext().reportError(SMLoc(),
4535 "Interference usage of base pointer/frame "
4536 "pointer.");
4537 MI++;
4538 }
4539}
4540
4541/// If a function uses base pointer and the base pointer is clobbered by inline
4542/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4543/// contains garbage value.
4544/// For example if a 32b x86 function uses base pointer esi, and esi is
4545/// clobbered by following inline asm
4546/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4547/// We need to save esi before the asm and restore it after the asm.
4548///
4549/// The problem can also occur to frame pointer if there is a function call, and
4550/// the callee uses a different calling convention and clobbers the fp.
4551///
4552/// Because normal frame objects (spill slots) are accessed through fp/bp
4553/// register, so we can't spill fp/bp to normal spill slots.
4554///
4555/// FIXME: There are 2 possible enhancements:
4556/// 1. In many cases there are different physical registers not clobbered by
4557/// inline asm, we can use one of them as base pointer. Or use a virtual
4558/// register as base pointer and let RA allocate a physical register to it.
4559/// 2. If there is no other instructions access stack with fp/bp from the
4560/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4561/// skip the save and restore operations.
4563 Register FP, BP;
4565 if (TFI.hasFP(MF))
4566 FP = TRI->getFrameRegister(MF);
4567 if (TRI->hasBasePointer(MF))
4568 BP = TRI->getBaseRegister();
4569
4570 // Currently only inline asm and function call can clobbers fp/bp. So we can
4571 // do some quick test and return early.
4572 if (!MF.hasInlineAsm()) {
4574 if (!X86FI->getFPClobberedByCall())
4575 FP = 0;
4576 if (!X86FI->getBPClobberedByCall())
4577 BP = 0;
4578 }
4579 if (!FP && !BP)
4580 return;
4581
4582 for (MachineBasicBlock &MBB : MF) {
4583 bool InsideEHLabels = false;
4584 auto MI = MBB.rbegin(), ME = MBB.rend();
4585 auto TermMI = MBB.getFirstTerminator();
4586 if (TermMI == MBB.begin())
4587 continue;
4588 MI = *(std::prev(TermMI));
4589
4590 while (MI != ME) {
4591 // Skip frame setup/destroy instructions.
4592 // Skip Invoke (call inside try block) instructions.
4593 // Skip instructions handled by target.
4594 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4596 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4597 ++MI;
4598 continue;
4599 }
4600
4601 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4602 InsideEHLabels = !InsideEHLabels;
4603 ++MI;
4604 continue;
4605 }
4606
4607 bool AccessFP, AccessBP;
4608 // Check if fp or bp is used in MI.
4609 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4610 ++MI;
4611 continue;
4612 }
4613
4614 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4615 // used.
4616 bool FPLive = false, BPLive = false;
4617 bool SpillFP = false, SpillBP = false;
4618 auto DefMI = MI, KillMI = MI;
4619 do {
4620 SpillFP |= AccessFP;
4621 SpillBP |= AccessBP;
4622
4623 // Maintain FPLive and BPLive.
4624 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4625 FPLive = false;
4626 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4627 FPLive = true;
4628 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4629 BPLive = false;
4630 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4631 BPLive = true;
4632
4633 DefMI = MI++;
4634 } while ((MI != ME) &&
4635 (FPLive || BPLive ||
4636 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4637
4638 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4639 if (FPLive && !SpillBP)
4640 continue;
4641
4642 // If the bp is clobbered by a call, we should save and restore outside of
4643 // the frame setup instructions.
4644 if (KillMI->isCall() && DefMI != ME) {
4645 auto FrameSetup = std::next(DefMI);
4646 // Look for frame setup instruction toward the start of the BB.
4647 // If we reach another call instruction, it means no frame setup
4648 // instruction for the current call instruction.
4649 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4650 !FrameSetup->isCall())
4651 ++FrameSetup;
4652 // If a frame setup instruction is found, we need to find out the
4653 // corresponding frame destroy instruction.
4654 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4655 (TII.getFrameSize(*FrameSetup) ||
4656 TII.getFrameAdjustment(*FrameSetup))) {
4657 while (!TII.isFrameInstr(*KillMI))
4658 --KillMI;
4659 DefMI = FrameSetup;
4660 MI = DefMI;
4661 ++MI;
4662 }
4663 }
4664
4665 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4666
4667 // Call target function to spill and restore FP and BP registers.
4668 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4669 }
4670 }
4671}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
constexpr uint64_t MaxSPChunk
static const unsigned FramePtr
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
reverse_iterator rend() const
Definition ArrayRef.h:139
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
reverse_iterator rbegin() const
Definition ArrayRef.h:138
BitVector & reset()
Definition BitVector.h:411
BitVector & set()
Definition BitVector.h:370
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
A debug info location.
Definition DebugLoc.h:124
unsigned size() const
Definition DenseMap.h:108
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition Function.h:903
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
size_t arg_size() const
Definition Function.h:899
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:681
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Module * getParent()
Get the module that this global value is contained inside of...
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition MCDwarf.h:592
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition MCDwarf.h:703
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition MCDwarf.h:666
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition MCDwarf.h:686
OpType getOperation() const
Definition MCDwarf.h:720
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition MCDwarf.h:608
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition MCDwarf.h:691
const MCObjectFileInfo * getObjectFileInfo() const
Definition MCContext.h:416
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:414
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
Metadata node.
Definition Metadata.h:1077
A single uniqued string.
Definition Metadata.h:720
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:617
Machine Value Type.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
LLVM_ABI int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
BasicBlockListType::iterator iterator
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition Module.cpp:296
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition Module.cpp:597
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
iterator end() const
Definition ArrayRef.h:348
iterator begin() const
Definition ArrayRef.h:347
A tuple of MDNodes.
Definition Metadata.h:1749
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
TargetFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl=Align(1), bool StackReal=true)
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
bool isUEFI() const
Tests whether the OS is UEFI.
Definition Triple.h:674
bool isOSWindows() const
Tests whether the OS is Windows.
Definition Triple.h:679
Value wrapper in the Metadata hierarchy.
Definition Metadata.h:457
Value * getValue() const
Definition Metadata.h:497
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int64_t mergeSPAdd(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int64_t AddOffset, bool doMergeWithPrevious) const
Equivalent to: mergeSPUpdates(MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; }...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
const X86TargetLowering * getTargetLowering() const override
bool isTargetWindowsCoreCLR() const
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition ARMWinEH.h:200
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
void stable_sort(R &&Range)
Definition STLExtras.h:2038
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
Definition CodeGen.h:55
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition LEB128.h:24
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition LEB128.h:81
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, Register Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
union llvm::TargetFrameLowering::DwarfFrameBase::@004076321055032247336074224075335064105264310375 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
SmallVector< WinEHHandlerType, 1 > HandlerArray