LLVM 22.0.0git
AArch64PrologueEpilogue.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "AArch64Subtarget.h"
14#include "llvm/ADT/Statistic.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
29}
30
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::PTRUE_C_B:
61 return I->getFlag(MachineInstr::FrameSetup) ||
63 case AArch64::SEH_SaveZReg:
64 return true;
65 }
66}
67
68// Convenience function to determine whether I is part of the PPR callee saves.
70 switch (I->getOpcode()) {
71 default:
72 return false;
73 case AArch64::STR_PXI:
74 case AArch64::LDR_PXI:
75 return I->getFlag(MachineInstr::FrameSetup) ||
77 case AArch64::SEH_SavePReg:
78 return true;
79 }
80}
81
82// Convenience function to determine whether I is part of the SVE callee saves.
86
90 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
91 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
92 RegInfo(*Subtarget.getRegisterInfo()) {
93 TII = Subtarget.getInstrInfo();
94 AFI = MF.getInfo<AArch64FunctionInfo>();
95
96 HasFP = AFL.hasFP(MF);
97 NeedsWinCFI = AFL.needsWinCFI(MF);
98
99 if (AFL.hasSVECalleeSavesAboveFrameRecord(MF)) {
100 if (AFI->hasStackHazardSlotIndex())
101 reportFatalUsageError("SME hazard padding is not supported on Windows");
102 SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
103 } else if (AFI->hasSplitSVEObjects()) {
104 SVELayout = SVEStackLayout::Split;
105 }
106}
107
110 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
111 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
112 unsigned NewOpc;
113
114 // If the function contains streaming mode changes, we expect instructions
115 // to calculate the value of VG before spilling. Move past these instructions
116 // if necessary.
117 if (AFL.requiresSaveVG(MF)) {
118 auto &TLI = *Subtarget.getTargetLowering();
119 while (isVGInstruction(MBBI, TLI))
120 ++MBBI;
121 }
122
123 switch (MBBI->getOpcode()) {
124 default:
125 llvm_unreachable("Unexpected callee-save save/restore opcode!");
126 case AArch64::STPXi:
127 NewOpc = AArch64::STPXpre;
128 break;
129 case AArch64::STPDi:
130 NewOpc = AArch64::STPDpre;
131 break;
132 case AArch64::STPQi:
133 NewOpc = AArch64::STPQpre;
134 break;
135 case AArch64::STRXui:
136 NewOpc = AArch64::STRXpre;
137 break;
138 case AArch64::STRDui:
139 NewOpc = AArch64::STRDpre;
140 break;
141 case AArch64::STRQui:
142 NewOpc = AArch64::STRQpre;
143 break;
144 case AArch64::LDPXi:
145 NewOpc = AArch64::LDPXpost;
146 break;
147 case AArch64::LDPDi:
148 NewOpc = AArch64::LDPDpost;
149 break;
150 case AArch64::LDPQi:
151 NewOpc = AArch64::LDPQpost;
152 break;
153 case AArch64::LDRXui:
154 NewOpc = AArch64::LDRXpost;
155 break;
156 case AArch64::LDRDui:
157 NewOpc = AArch64::LDRDpost;
158 break;
159 case AArch64::LDRQui:
160 NewOpc = AArch64::LDRQpost;
161 break;
162 }
163 TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
164 int64_t MinOffset, MaxOffset;
165 bool Success = TII->getMemOpInfo(NewOpc, Scale, Width, MinOffset, MaxOffset);
166 (void)Success;
167 assert(Success && "unknown load/store opcode");
168
169 // If the first store isn't right where we want SP then we can't fold the
170 // update in so create a normal arithmetic instruction instead.
171 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
172 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
173 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
174 // If we are destroying the frame, make sure we add the increment after the
175 // last frame operation.
176 if (FrameFlag == MachineInstr::FrameDestroy) {
177 ++MBBI;
178 // Also skip the SEH instruction, if needed
179 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
180 ++MBBI;
181 }
182 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
183 StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
184 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
185 StackOffset::getFixed(CFAOffset));
186
187 return std::prev(MBBI);
188 }
189
190 // Get rid of the SEH code associated with the old instruction.
191 if (NeedsWinCFI) {
192 auto SEH = std::next(MBBI);
193 if (AArch64InstrInfo::isSEHInstruction(*SEH))
194 SEH->eraseFromParent();
195 }
196
197 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
198 MIB.addReg(AArch64::SP, RegState::Define);
199
200 // Copy all operands other than the immediate offset.
201 unsigned OpndIdx = 0;
202 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
203 ++OpndIdx)
204 MIB.add(MBBI->getOperand(OpndIdx));
205
206 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
207 "Unexpected immediate offset in first/last callee-save save/restore "
208 "instruction!");
209 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
210 "Unexpected base register in callee-save save/restore instruction!");
211 assert(CSStackSizeInc % Scale == 0);
212 MIB.addImm(CSStackSizeInc / (int)Scale);
213
214 MIB.setMIFlags(MBBI->getFlags());
215 MIB.setMemRefs(MBBI->memoperands());
216
217 // Generate a new SEH code that corresponds to the new instruction.
218 if (NeedsWinCFI) {
219 HasWinCFI = true;
220 AFL.insertSEH(*MIB, *TII, FrameFlag);
221 }
222
223 if (EmitCFI)
224 CFIInstBuilder(MBB, MBBI, FrameFlag)
225 .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
226
227 return std::prev(MBB.erase(MBBI));
228}
229
230// Fix up the SEH opcode associated with the save/restore instruction.
232 unsigned LocalStackSize) {
233 MachineOperand *ImmOpnd = nullptr;
234 unsigned ImmIdx = MBBI->getNumOperands() - 1;
235 switch (MBBI->getOpcode()) {
236 default:
237 llvm_unreachable("Fix the offset in the SEH instruction");
238 case AArch64::SEH_SaveFPLR:
239 case AArch64::SEH_SaveRegP:
240 case AArch64::SEH_SaveReg:
241 case AArch64::SEH_SaveFRegP:
242 case AArch64::SEH_SaveFReg:
243 case AArch64::SEH_SaveAnyRegI:
244 case AArch64::SEH_SaveAnyRegIP:
245 case AArch64::SEH_SaveAnyRegQP:
246 case AArch64::SEH_SaveAnyRegQPX:
247 ImmOpnd = &MBBI->getOperand(ImmIdx);
248 break;
249 }
250 if (ImmOpnd)
251 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
252}
253
255 MachineInstr &MI, uint64_t LocalStackSize) const {
256 if (AArch64InstrInfo::isSEHInstruction(MI))
257 return;
258
259 unsigned Opc = MI.getOpcode();
260 unsigned Scale;
261 switch (Opc) {
262 case AArch64::STPXi:
263 case AArch64::STRXui:
264 case AArch64::STPDi:
265 case AArch64::STRDui:
266 case AArch64::LDPXi:
267 case AArch64::LDRXui:
268 case AArch64::LDPDi:
269 case AArch64::LDRDui:
270 Scale = 8;
271 break;
272 case AArch64::STPQi:
273 case AArch64::STRQui:
274 case AArch64::LDPQi:
275 case AArch64::LDRQui:
276 Scale = 16;
277 break;
278 default:
279 llvm_unreachable("Unexpected callee-save save/restore opcode!");
280 }
281
282 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
283 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
284 "Unexpected base register in callee-save save/restore instruction!");
285 // Last operand is immediate offset that needs fixing.
286 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
287 // All generated opcodes have scaled offsets.
288 assert(LocalStackSize % Scale == 0);
289 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
290
291 if (NeedsWinCFI) {
292 HasWinCFI = true;
293 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
294 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
295 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
296 "Expecting a SEH instruction");
297 fixupSEHOpcode(MBBI, LocalStackSize);
298 }
299}
300
302 uint64_t StackBumpBytes) const {
303 if (AFL.homogeneousPrologEpilog(MF))
304 return false;
305
306 if (AFI->getLocalStackSize() == 0)
307 return false;
308
309 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
310 // (to force a stp with predecrement) to match the packed unwind format,
311 // provided that there actually are any callee saved registers to merge the
312 // decrement with.
313 // This is potentially marginally slower, but allows using the packed
314 // unwind format for functions that both have a local area and callee saved
315 // registers. Using the packed unwind format notably reduces the size of
316 // the unwind info.
317 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
318 MF.getFunction().hasOptSize())
319 return false;
320
321 // 512 is the maximum immediate for stp/ldp that will be used for
322 // callee-save save/restores
323 if (StackBumpBytes >= 512 ||
324 AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
325 return false;
326
327 if (MFI.hasVarSizedObjects())
328 return false;
329
330 if (RegInfo.hasStackRealignment(MF))
331 return false;
332
333 // This isn't strictly necessary, but it simplifies things a bit since the
334 // current RedZone handling code assumes the SP is adjusted by the
335 // callee-save save/restore code.
336 if (AFL.canUseRedZone(MF))
337 return false;
338
339 // When there is an SVE area on the stack, always allocate the
340 // callee-saves and spills/locals separately.
341 if (AFI->hasSVEStackSize())
342 return false;
343
344 return true;
345}
346
348 StackOffset PPRCalleeSavesSize =
349 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
350 StackOffset ZPRCalleeSavesSize =
351 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
352 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
353 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
355 return {{PPRCalleeSavesSize, PPRLocalsSize},
356 {ZPRCalleeSavesSize, ZPRLocalsSize}};
357 // For simplicity, attribute all locals to ZPRs when split SVE is disabled.
358 return {{PPRCalleeSavesSize, StackOffset{}},
359 {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
360}
361
363 SVEFrameSizes const &SVE) {
364 StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
365 StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
366 StackOffset AfterPPRs = {};
368 BeforePPRs = SVE.PPR.CalleeSavesSize;
369 // If there are no ZPR CSRs, place all local allocations after the ZPRs.
370 if (SVE.ZPR.CalleeSavesSize)
371 AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
372 else
373 AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
374 }
375 return {BeforePPRs, AfterPPRs, AfterZPRs};
376}
377
383
386 StackOffset PPRCalleeSavesSize,
387 StackOffset ZPRCalleeSavesSize,
388 bool IsEpilogue) {
391 IsEpilogue ? MBB.begin() : MBB.getFirstTerminator();
392 auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; };
393 // Process the SVE CS to find the starts/ends of the ZPR and PPR areas.
394 if (PPRCalleeSavesSize) {
395 PPRsI = AdjustI(PPRsI);
396 assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction");
397 while (PPRsI != End && isPartOfPPRCalleeSaves(AdjustI(PPRsI)))
398 IsEpilogue ? (--PPRsI) : (++PPRsI);
399 }
400 MachineBasicBlock::iterator ZPRsI = PPRsI;
401 if (ZPRCalleeSavesSize) {
402 ZPRsI = AdjustI(ZPRsI);
403 assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction");
404 while (ZPRsI != End && isPartOfZPRCalleeSaves(AdjustI(ZPRsI)))
405 IsEpilogue ? (--ZPRsI) : (++ZPRsI);
406 }
407 if (IsEpilogue)
408 return {{PPRsI, MBBI}, {ZPRsI, PPRsI}};
409 return {{MBBI, PPRsI}, {PPRsI, ZPRsI}};
410}
411
416 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
417 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
418 IsFunclet = MBB.isEHFuncletEntry();
419 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
420
421#ifndef NDEBUG
422 collectBlockLiveins();
423#endif
424}
425
426#ifndef NDEBUG
427/// Collect live registers from the end of \p MI's parent up to (including) \p
428/// MI in \p LiveRegs.
431
432 MachineBasicBlock &MBB = *MI.getParent();
433 LiveRegs.addLiveOuts(MBB);
434 for (const MachineInstr &MI :
435 reverse(make_range(MI.getIterator(), MBB.instr_end())))
436 LiveRegs.stepBackward(MI);
437}
438
439void AArch64PrologueEmitter::collectBlockLiveins() {
440 // Collect live register from the end of MBB up to the start of the existing
441 // frame setup instructions.
442 PrologueEndI = MBB.begin();
443 while (PrologueEndI != MBB.end() &&
444 PrologueEndI->getFlag(MachineInstr::FrameSetup))
445 ++PrologueEndI;
446
447 if (PrologueEndI != MBB.end()) {
448 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
449 // Ignore registers used for stack management for now.
450 LiveRegs.removeReg(AArch64::SP);
451 LiveRegs.removeReg(AArch64::X19);
452 LiveRegs.removeReg(AArch64::FP);
453 LiveRegs.removeReg(AArch64::LR);
454
455 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
456 // This is necessary to spill VG if required where SVE is unavailable, but
457 // X0 is preserved around this call.
458 if (requiresGetVGCall())
459 LiveRegs.removeReg(AArch64::X0);
460 }
461}
462
463void AArch64PrologueEmitter::verifyPrologueClobbers() const {
464 if (PrologueEndI == MBB.end())
465 return;
466 // Check if any of the newly instructions clobber any of the live registers.
467 for (MachineInstr &MI :
468 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
469 for (auto &Op : MI.operands())
470 if (Op.isReg() && Op.isDef())
471 assert(!LiveRegs.contains(Op.getReg()) &&
472 "live register clobbered by inserted prologue instructions");
473 }
474}
475#endif
476
477void AArch64PrologueEmitter::determineLocalsStackSize(
478 uint64_t StackSize, uint64_t PrologueSaveSize) {
479 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
480 CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
481}
482
483// Return the maximum possible number of bytes for `Size` due to the
484// architectural limit on the size of a SVE register.
485static int64_t upperBound(StackOffset Size) {
486 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
487 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
488}
489
490void AArch64PrologueEmitter::allocateStackSpace(
491 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
492 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
493 bool FollowupAllocs) {
494
495 if (!AllocSize)
496 return;
497
498 DebugLoc DL;
499 const int64_t MaxAlign = MFI.getMaxAlign().value();
500 const uint64_t AndMask = ~(MaxAlign - 1);
501
503 Register TargetReg = RealignmentPadding
504 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
505 : AArch64::SP;
506 // SUB Xd/SP, SP, AllocSize
507 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
509 EmitCFI, InitialOffset);
510
511 if (RealignmentPadding) {
512 // AND SP, X9, 0b11111...0000
513 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
514 .addReg(TargetReg, RegState::Kill)
517 AFI->setStackRealigned(true);
518
519 // No need for SEH instructions here; if we're realigning the stack,
520 // we've set a frame pointer and already finished the SEH prologue.
522 }
523 return;
524 }
525
526 //
527 // Stack probing allocation.
528 //
529
530 // Fixed length allocation. If we don't need to re-align the stack and don't
531 // have SVE objects, we can use a more efficient sequence for stack probing.
532 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
533 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
534 assert(ScratchReg != AArch64::NoRegister);
535 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
536 .addDef(ScratchReg)
537 .addImm(AllocSize.getFixed())
538 .addImm(InitialOffset.getFixed())
539 .addImm(InitialOffset.getScalable());
540 // The fixed allocation may leave unprobed bytes at the top of the
541 // stack. If we have subsequent allocation (e.g. if we have variable-sized
542 // objects), we need to issue an extra probe, so these allocations start in
543 // a known state.
544 if (FollowupAllocs) {
545 // STR XZR, [SP]
546 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
547 .addReg(AArch64::XZR)
548 .addReg(AArch64::SP)
549 .addImm(0)
551 }
552
553 return;
554 }
555
556 // Variable length allocation.
557
558 // If the (unknown) allocation size cannot exceed the probe size, decrement
559 // the stack pointer right away.
560 int64_t ProbeSize = AFI->getStackProbeSize();
561 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
562 Register ScratchReg = RealignmentPadding
563 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
564 : AArch64::SP;
565 assert(ScratchReg != AArch64::NoRegister);
566 // SUB Xd, SP, AllocSize
567 emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
569 EmitCFI, InitialOffset);
570 if (RealignmentPadding) {
571 // AND SP, Xn, 0b11111...0000
572 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
573 .addReg(ScratchReg, RegState::Kill)
576 AFI->setStackRealigned(true);
577 }
578 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
580 // STR XZR, [SP]
581 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
582 .addReg(AArch64::XZR)
583 .addReg(AArch64::SP)
584 .addImm(0)
586 }
587 return;
588 }
589
590 // Emit a variable-length allocation probing loop.
591 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
592 // each of them guaranteed to adjust the stack by less than the probe size.
593 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
594 assert(TargetReg != AArch64::NoRegister);
595 // SUB Xd, SP, AllocSize
596 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
598 EmitCFI, InitialOffset);
599 if (RealignmentPadding) {
600 // AND Xn, Xn, 0b11111...0000
601 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
602 .addReg(TargetReg, RegState::Kill)
605 }
606
607 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
608 .addReg(TargetReg);
609 if (EmitCFI) {
610 // Set the CFA register back to SP.
611 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
612 .buildDefCFARegister(AArch64::SP);
613 }
614 if (RealignmentPadding)
615 AFI->setStackRealigned(true);
616}
617
619 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
620 const MachineBasicBlock::iterator EndI = MBB.end();
621
622 // At this point, we're going to decide whether or not the function uses a
623 // redzone. In most cases, the function doesn't have a redzone so let's
624 // assume that's false and set it to true in the case that there's a redzone.
625 AFI->setHasRedZone(false);
626
627 // Debug location must be unknown since the first debug location is used
628 // to determine the end of the prologue.
629 DebugLoc DL;
630
631 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
632 // have a tail-call where the caller only needs to adjust the stack pointer in
633 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
634 // See `seh-minimal-prologue-epilogue.ll` test cases.
635 if (AFI->getArgumentStackToRestore())
636 HasWinCFI = true;
637
638 if (AFI->shouldSignReturnAddress(MF)) {
639 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
640 // are inserted by emitPacRetPlusLeafHardening().
641 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
642 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
644 }
645 // AArch64PointerAuth pass will insert SEH_PACSignLR
647 }
648
649 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
650 emitShadowCallStackPrologue(PrologueBeginI, DL);
652 }
653
654 if (EmitCFI && AFI->isMTETagged())
655 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
657
658 // We signal the presence of a Swift extended frame to external tools by
659 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
660 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
661 // bits so that is still true.
662 if (HasFP && AFI->hasSwiftAsyncContext())
663 emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
664
665 // All calls are tail calls in GHC calling conv, and functions have no
666 // prologue/epilogue.
667 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
668 return;
669
670 // Set tagged base pointer to the requested stack slot. Ideally it should
671 // match SP value after prologue.
672 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
673 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
674 else
675 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
676
677 // getStackSize() includes all the locals in its size calculation. We don't
678 // include these locals when computing the stack size of a funclet, as they
679 // are allocated in the parent's stack frame and accessed via the frame
680 // pointer from the funclet. We only save the callee saved registers in the
681 // funclet, which are really the callee saved registers of the parent
682 // function, including the funclet.
683 int64_t NumBytes =
684 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
685 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
686 return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
687
688 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
689 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
690
691 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
692 // All of the remaining stack allocations are for locals.
693 determineLocalsStackSize(NumBytes, PrologueSaveSize);
694
695 auto [PPR, ZPR] = getSVEStackFrameSizes();
696 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
697
698 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
700 assert(!SVEAllocs.AfterPPRs &&
701 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
702 // If we're doing SVE saves first, we need to immediately allocate space
703 // for fixed objects, then space for the SVE callee saves.
704 //
705 // Windows unwind requires that the scalable size is a multiple of 16;
706 // that's handled when the callee-saved size is computed.
707 auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
708 allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
709 /*FollowupAllocs=*/true);
710 NumBytes -= FixedObject;
711
712 // Now allocate space for the GPR callee saves.
713 MachineBasicBlock::iterator MBBI = PrologueBeginI;
714 while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
715 ++MBBI;
717 MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
718 NumBytes -= AFI->getCalleeSavedStackSize();
719 } else if (CombineSPBump) {
720 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
721 emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
722 StackOffset::getFixed(-NumBytes), TII,
724 EmitAsyncCFI);
725 NumBytes = 0;
726 } else if (HomPrologEpilog) {
727 // Stack has been already adjusted.
728 NumBytes -= PrologueSaveSize;
729 } else if (PrologueSaveSize != 0) {
731 PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
732 NumBytes -= PrologueSaveSize;
733 }
734 assert(NumBytes >= 0 && "Negative stack allocation size!?");
735
736 // Move past the saves of the callee-saved registers, fixing up the offsets
737 // and pre-inc if we decided to combine the callee-save and local stack
738 // pointer bump above.
739 auto &TLI = *Subtarget.getTargetLowering();
740
741 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
742 while (AfterGPRSavesI != EndI &&
743 AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
744 !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
745 if (CombineSPBump &&
746 // Only fix-up frame-setup load/store instructions.
747 (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
748 fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
749 AFI->getLocalStackSize());
750 ++AfterGPRSavesI;
751 }
752
753 // For funclets the FP belongs to the containing function. Only set up FP if
754 // we actually need to.
755 if (!IsFunclet && HasFP)
756 emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
757
758 // Now emit the moves for whatever callee saved regs we have (including FP,
759 // LR if those are saved). Frame instructions for SVE register are emitted
760 // later, after the instruction which actually save SVE regs.
761 if (EmitAsyncCFI)
762 emitCalleeSavedGPRLocations(AfterGPRSavesI);
763
764 // Alignment is required for the parent frame, not the funclet
765 const bool NeedsRealignment =
766 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
767 const int64_t RealignmentPadding =
768 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
769 ? MFI.getMaxAlign().value() - 16
770 : 0;
771
772 if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
773 emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
774
775 StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
776 SVEAllocs.AfterZPRs += NonSVELocalsSize;
777
778 StackOffset CFAOffset =
779 StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
780 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
781 // Allocate space for the callee saves and PPR locals (if any).
783 auto [PPRRange, ZPRRange] =
784 partitionSVECS(MBB, AfterGPRSavesI, PPR.CalleeSavesSize,
785 ZPR.CalleeSavesSize, /*IsEpilogue=*/false);
786 AfterSVESavesI = ZPRRange.End;
787 if (EmitAsyncCFI)
788 emitCalleeSavedSVELocations(AfterSVESavesI);
789
790 allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
791 EmitAsyncCFI && !HasFP, CFAOffset,
792 MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
793 SVEAllocs.AfterZPRs);
794 CFAOffset += SVEAllocs.BeforePPRs;
795 assert(PPRRange.End == ZPRRange.Begin &&
796 "Expected ZPR callee saves after PPR locals");
797 allocateStackSpace(PPRRange.End, 0, SVEAllocs.AfterPPRs,
798 EmitAsyncCFI && !HasFP, CFAOffset,
799 MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
800 CFAOffset += SVEAllocs.AfterPPRs;
801 } else {
803 // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
804 // already been allocated. PPR locals (included in AfterPPRs) are not
805 // supported (note: this is asserted above).
806 CFAOffset += SVEAllocs.BeforePPRs;
807 }
808
809 // Allocate space for the rest of the frame including ZPR locals. Align the
810 // stack as necessary.
811 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
812 "Cannot use redzone with stack realignment");
813 if (!AFL.canUseRedZone(MF)) {
814 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
815 // correct value here, as NumBytes also includes padding bytes, which
816 // shouldn't be counted here.
817 allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
818 EmitAsyncCFI && !HasFP, CFAOffset,
819 MFI.hasVarSizedObjects());
820 }
821
822 // If we need a base pointer, set it up here. It's whatever the value of the
823 // stack pointer is at this point. Any variable size objects will be
824 // allocated after this, so we can still use the base pointer to reference
825 // locals.
826 //
827 // FIXME: Clarify FrameSetup flags here.
828 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
829 // needed.
830 // For funclets the BP belongs to the containing function.
831 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
832 TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
833 AArch64::SP, false);
834 if (NeedsWinCFI) {
835 HasWinCFI = true;
836 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
838 }
839 }
840
841 // The very last FrameSetup instruction indicates the end of prologue. Emit a
842 // SEH opcode indicating the prologue end.
843 if (NeedsWinCFI && HasWinCFI) {
844 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
846 }
847
848 // SEH funclets are passed the frame pointer in X1. If the parent
849 // function uses the base register, then the base register is used
850 // directly, and is not retrieved from X1.
851 if (IsFunclet && F.hasPersonalityFn()) {
852 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
854 BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
855 AArch64::FP)
856 .addReg(AArch64::X1)
858 MBB.addLiveIn(AArch64::X1);
859 }
860 }
861
862 if (EmitCFI && !EmitAsyncCFI) {
863 if (HasFP) {
864 emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
865 } else {
866 StackOffset TotalSize =
867 AFL.getSVEStackSize(MF) +
868 StackOffset::getFixed((int64_t)MFI.getStackSize());
869 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
870 CFIBuilder.insertCFIInst(
871 createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
872 TotalSize, /*LastAdjustmentWasScalable=*/false));
873 }
874 emitCalleeSavedGPRLocations(AfterSVESavesI);
875 emitCalleeSavedSVELocations(AfterSVESavesI);
876 }
877}
878
879void AArch64PrologueEmitter::emitShadowCallStackPrologue(
881 // Shadow call stack prolog: str x30, [x18], #8
882 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
883 .addReg(AArch64::X18, RegState::Define)
884 .addReg(AArch64::LR)
885 .addReg(AArch64::X18)
886 .addImm(8)
888
889 // This instruction also makes x18 live-in to the entry block.
890 MBB.addLiveIn(AArch64::X18);
891
892 if (NeedsWinCFI)
893 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
895
896 if (EmitCFI) {
897 // Emit a CFI instruction that causes 8 to be subtracted from the value of
898 // x18 when unwinding past this frame.
899 static const char CFIInst[] = {
900 dwarf::DW_CFA_val_expression,
901 18, // register
902 2, // length
903 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
904 static_cast<char>(-8) & 0x7f, // addend (sleb128)
905 };
906 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
907 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
908 }
909}
910
911void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
913 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
915 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
916 // The special symbol below is absolute and has a *value* that can be
917 // combined with the frame pointer to signal an extended frame.
918 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
919 .addExternalSymbol("swift_async_extendedFramePointerFlags",
921 if (NeedsWinCFI) {
922 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
924 HasWinCFI = true;
925 }
926 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
927 .addUse(AArch64::FP)
928 .addUse(AArch64::X16)
929 .addImm(Subtarget.isTargetILP32() ? 32 : 0);
930 if (NeedsWinCFI) {
931 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
933 HasWinCFI = true;
934 }
935 break;
936 }
937 [[fallthrough]];
938
940 // ORR x29, x29, #0x1000_0000_0000_0000
941 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
942 .addUse(AArch64::FP)
943 .addImm(0x1100)
945 if (NeedsWinCFI) {
946 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
948 HasWinCFI = true;
949 }
950 break;
951
953 break;
954 }
955}
956
957void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
958 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
959 const DebugLoc &DL) const {
960 assert(!HasFP && "unexpected function without stack frame but with FP");
961 assert(!AFL.getSVEStackSize(MF) &&
962 "unexpected function without stack frame but with SVE objects");
963 // All of the stack allocation is for locals.
964 AFI->setLocalStackSize(NumBytes);
965 if (!NumBytes) {
966 if (NeedsWinCFI && HasWinCFI) {
967 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
969 }
970 return;
971 }
972 // REDZONE: If the stack size is less than 128 bytes, we don't need
973 // to actually allocate.
974 if (AFL.canUseRedZone(MF)) {
975 AFI->setHasRedZone(true);
976 ++NumRedZoneFunctions;
977 } else {
978 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
979 StackOffset::getFixed(-NumBytes), TII,
981 if (EmitCFI) {
982 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
983 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
984 // Encode the stack size of the leaf function.
985 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
986 .buildDefCFAOffset(NumBytes, FrameLabel);
987 }
988 }
989
990 if (NeedsWinCFI) {
991 HasWinCFI = true;
992 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
994 }
995}
996
997void AArch64PrologueEmitter::emitFramePointerSetup(
999 unsigned FixedObject) {
1000 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
1001 if (CombineSPBump)
1002 FPOffset += AFI->getLocalStackSize();
1003
1004 if (AFI->hasSwiftAsyncContext()) {
1005 // Before we update the live FP we have to ensure there's a valid (or
1006 // null) asynchronous context in its slot just before FP in the frame
1007 // record, so store it now.
1008 const auto &Attrs = MF.getFunction().getAttributes();
1009 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1010 if (HaveInitialContext)
1011 MBB.addLiveIn(AArch64::X22);
1012 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1013 BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
1014 .addUse(Reg)
1015 .addUse(AArch64::SP)
1016 .addImm(FPOffset - 8)
1018 if (NeedsWinCFI) {
1019 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1020 // to multiple instructions, should be mutually-exclusive.
1021 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1022 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1024 HasWinCFI = true;
1025 }
1026 }
1027
1028 if (HomPrologEpilog) {
1029 auto Prolog = MBBI;
1030 --Prolog;
1031 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1032 Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1033 } else {
1034 // Issue sub fp, sp, FPOffset or
1035 // mov fp,sp when FPOffset is zero.
1036 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1037 // This code marks the instruction(s) that set the FP also.
1038 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1039 StackOffset::getFixed(FPOffset), TII,
1041 if (NeedsWinCFI && HasWinCFI) {
1042 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1044 // After setting up the FP, the rest of the prolog doesn't need to be
1045 // included in the SEH unwind info.
1046 NeedsWinCFI = false;
1047 }
1048 }
1049 if (EmitAsyncCFI)
1050 emitDefineCFAWithFP(MBBI, FixedObject);
1051}
1052
1053// Define the current CFA rule to use the provided FP.
1054void AArch64PrologueEmitter::emitDefineCFAWithFP(
1055 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1056 const int OffsetToFirstCalleeSaveFromFP =
1057 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1058 AFI->getCalleeSavedStackSize();
1059 Register FramePtr = RegInfo.getFrameRegister(MF);
1060 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1061 .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
1062}
1063
1064void AArch64PrologueEmitter::emitWindowsStackProbe(
1065 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1066 int64_t RealignmentPadding) const {
1067 if (AFI->getSVECalleeSavedStackSize())
1068 report_fatal_error("SVE callee saves not yet supported with stack probing");
1069
1070 // Find an available register to spill the value of X15 to, if X15 is being
1071 // used already for nest.
1072 unsigned X15Scratch = AArch64::NoRegister;
1073 if (llvm::any_of(MBB.liveins(),
1074 [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1075 return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
1076 LiveIn.PhysReg);
1077 })) {
1078 X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
1079 assert(X15Scratch != AArch64::NoRegister &&
1080 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1081#ifndef NDEBUG
1082 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1083#endif
1084 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
1085 .addReg(AArch64::XZR)
1086 .addReg(AArch64::X15, RegState::Undef)
1087 .addReg(AArch64::X15, RegState::Implicit)
1089 }
1090
1091 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1092 if (NeedsWinCFI) {
1093 HasWinCFI = true;
1094 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1095 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1096 // This is at most two instructions, MOVZ followed by MOVK.
1097 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1098 // exceeding 256MB in size.
1099 if (NumBytes >= (1 << 28))
1100 report_fatal_error("Stack size cannot exceed 256MB for stack "
1101 "unwinding purposes");
1102
1103 uint32_t LowNumWords = NumWords & 0xFFFF;
1104 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1105 .addImm(LowNumWords)
1108 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1110 if ((NumWords & 0xFFFF0000) != 0) {
1111 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1112 .addReg(AArch64::X15)
1113 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1116 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1118 }
1119 } else {
1120 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1121 .addImm(NumWords)
1123 }
1124
1125 const char *ChkStk = Subtarget.getChkStkName();
1126 switch (MF.getTarget().getCodeModel()) {
1127 case CodeModel::Tiny:
1128 case CodeModel::Small:
1129 case CodeModel::Medium:
1130 case CodeModel::Kernel:
1131 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1132 .addExternalSymbol(ChkStk)
1133 .addReg(AArch64::X15, RegState::Implicit)
1134 .addReg(AArch64::X16,
1136 .addReg(AArch64::X17,
1138 .addReg(AArch64::NZCV,
1141 if (NeedsWinCFI) {
1142 HasWinCFI = true;
1143 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1145 }
1146 break;
1147 case CodeModel::Large:
1148 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1149 .addReg(AArch64::X16, RegState::Define)
1150 .addExternalSymbol(ChkStk)
1151 .addExternalSymbol(ChkStk)
1153 if (NeedsWinCFI) {
1154 HasWinCFI = true;
1155 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1157 }
1158
1160 .addReg(AArch64::X16, RegState::Kill)
1162 .addReg(AArch64::X16,
1164 .addReg(AArch64::X17,
1166 .addReg(AArch64::NZCV,
1169 if (NeedsWinCFI) {
1170 HasWinCFI = true;
1171 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1173 }
1174 break;
1175 }
1176
1177 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1178 .addReg(AArch64::SP, RegState::Kill)
1179 .addReg(AArch64::X15, RegState::Kill)
1182 if (NeedsWinCFI) {
1183 HasWinCFI = true;
1184 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1185 .addImm(NumBytes)
1187 }
1188 NumBytes = 0;
1189
1190 if (RealignmentPadding > 0) {
1191 if (RealignmentPadding >= 4096) {
1192 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
1193 .addReg(AArch64::X16, RegState::Define)
1194 .addImm(RealignmentPadding)
1196 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
1197 .addReg(AArch64::SP)
1198 .addReg(AArch64::X16, RegState::Kill)
1201 } else {
1202 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
1203 .addReg(AArch64::SP)
1204 .addImm(RealignmentPadding)
1205 .addImm(0)
1207 }
1208
1209 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1210 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1211 .addReg(AArch64::X15, RegState::Kill)
1213 AFI->setStackRealigned(true);
1214
1215 // No need for SEH instructions here; if we're realigning the stack,
1216 // we've set a frame pointer and already finished the SEH prologue.
1218 }
1219 if (X15Scratch != AArch64::NoRegister) {
1220 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
1221 .addReg(AArch64::XZR)
1222 .addReg(X15Scratch, RegState::Undef)
1223 .addReg(X15Scratch, RegState::Implicit)
1225 }
1226}
1227
1228void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1230 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1231 if (CSI.empty())
1232 return;
1233
1234 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1235 for (const auto &Info : CSI) {
1236 unsigned FrameIdx = Info.getFrameIdx();
1237 if (MFI.hasScalableStackID(FrameIdx))
1238 continue;
1239
1240 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1241 int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
1242 CFIBuilder.buildOffset(Info.getReg(), Offset);
1243 }
1244}
1245
1246void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1248 // Add callee saved registers to move list.
1249 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1250 if (CSI.empty())
1251 return;
1252
1253 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1254
1255 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1256 if (AFL.requiresSaveVG(MF)) {
1257 auto IncomingVG = *find_if(
1258 reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
1259 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
1260 AFL.getOffsetOfLocalArea();
1261 }
1262
1263 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1264 for (const auto &Info : CSI) {
1265 int FI = Info.getFrameIdx();
1266 if (!MFI.hasScalableStackID(FI))
1267 continue;
1268
1269 // Not all unwinders may know about SVE registers, so assume the lowest
1270 // common denominator.
1271 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1272 MCRegister Reg = Info.getReg();
1273 if (!RegInfo.regNeedsCFI(Reg, Reg))
1274 continue;
1275
1276 StackOffset Offset =
1277 StackOffset::getScalable(MFI.getObjectOffset(FI)) -
1278 StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
1279
1280 // The scalable vectors are below (lower address) the scalable predicates
1281 // with split SVE objects, so we must subtract the size of the predicates.
1283 MFI.getStackID(FI) == TargetStackID::ScalableVector)
1284 Offset -= PPRStackSize;
1285
1286 CFIBuilder.insertCFIInst(
1287 createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
1288 }
1289}
1290
1292 switch (MI.getOpcode()) {
1293 default:
1294 return false;
1295 case AArch64::CATCHRET:
1296 case AArch64::CLEANUPRET:
1297 return true;
1298 }
1299}
1300
1305 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1306 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
1307 SEHEpilogueStartI = MBB.end();
1308}
1309
1310void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
1312 // Other combinations could be supported, but are not currently needed.
1313 assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
1314 "expected negative offset (with optional fixed portion)");
1315 Register Base = AArch64::FP;
1316 if (int64_t FixedOffset = Offset.getFixed()) {
1317 // If we have a negative fixed offset, we need to first subtract it in a
1318 // temporary register first (to avoid briefly deallocating the scalable
1319 // portion of the offset).
1320 Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
1321 emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP,
1322 StackOffset::getFixed(FixedOffset), TII,
1324 }
1325 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base,
1326 StackOffset::getScalable(Offset.getScalable()), TII,
1328}
1329
1331 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1332 if (MBB.end() != EpilogueEndI) {
1333 DL = EpilogueEndI->getDebugLoc();
1334 IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
1335 }
1336
1337 int64_t NumBytes =
1338 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1339
1340 // All calls are tail calls in GHC calling conv, and functions have no
1341 // prologue/epilogue.
1342 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1343 return;
1344
1345 // How much of the stack used by incoming arguments this function is expected
1346 // to restore in this particular epilogue.
1347 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1348 bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
1349 MF.getFunction().isVarArg());
1350 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1351
1352 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1353 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1354 // We cannot rely on the local stack size set in emitPrologue if the function
1355 // has funclets, as funclets have different local stack size requirements, and
1356 // the current value set in emitPrologue may be that of the containing
1357 // function.
1358 if (MF.hasEHFunclets())
1359 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1360
1361 if (HomPrologEpilog) {
1363 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1364 if (FirstHomogenousEpilogI != MBB.begin()) {
1365 auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
1366 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1367 FirstHomogenousEpilogI = HomogeneousEpilog;
1368 }
1369
1370 // Adjust local stack
1371 emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
1372 StackOffset::getFixed(AFI->getLocalStackSize()), TII,
1374
1375 // SP has been already adjusted while restoring callee save regs.
1376 // We've bailed-out the case with adjusting SP for arguments.
1377 assert(AfterCSRPopSize == 0);
1378 return;
1379 }
1380
1381 bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1382
1383 unsigned ProloguePopSize = PrologueSaveSize;
1385 // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1386 // that needs to be popped until we reach the start of the SVE save area.
1387 // The "FixedObject" stack occurs after the SVE area and must be popped
1388 // later.
1389 ProloguePopSize -= FixedObject;
1390 AfterCSRPopSize += FixedObject;
1391 }
1392
1393 // Assume we can't combine the last pop with the sp restore.
1394 if (!CombineSPBump && ProloguePopSize != 0) {
1395 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1396 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1397 AArch64InstrInfo::isSEHInstruction(*Pop) ||
1400 Pop = std::prev(Pop);
1401 // Converting the last ldp to a post-index ldp is valid only if the last
1402 // ldp's offset is 0.
1403 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1404 // If the offset is 0 and the AfterCSR pop is not actually trying to
1405 // allocate more stack for arguments (in space that an untimely interrupt
1406 // may clobber), convert it to a post-index ldp.
1407 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1409 Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1410 ProloguePopSize);
1412 MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
1413 if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
1414 ++AfterLastPop;
1415 // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1416 // callee-save non-SVE registers to move the stack pointer to the start of
1417 // the SVE area.
1418 emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
1419 StackOffset::getFixed(ProloguePopSize), TII,
1421 &HasWinCFI);
1422 } else {
1423 // Otherwise, make sure to emit an add after the last ldp.
1424 // We're doing this by transferring the size to be restored from the
1425 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1426 // pops.
1427 AfterCSRPopSize += ProloguePopSize;
1428 }
1429 }
1430
1431 // Move past the restores of the callee-saved registers.
1432 // If we plan on combining the sp bump of the local stack size and the callee
1433 // save stack size, we might need to adjust the CSR save and restore offsets.
1434 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1435 MachineBasicBlock::iterator Begin = MBB.begin();
1436 while (FirstGPRRestoreI != Begin) {
1437 --FirstGPRRestoreI;
1438 if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
1440 isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
1441 ++FirstGPRRestoreI;
1442 break;
1443 } else if (CombineSPBump)
1444 fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
1445 AFI->getLocalStackSize());
1446 }
1447
1448 if (NeedsWinCFI) {
1449 // Note that there are cases where we insert SEH opcodes in the
1450 // epilogue when we had no SEH opcodes in the prologue. For
1451 // example, when there is no stack frame but there are stack
1452 // arguments. Insert the SEH_EpilogStart and remove it later if it
1453 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1454 // functions that don't need it.
1455 BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
1457 SEHEpilogueStartI = FirstGPRRestoreI;
1458 --SEHEpilogueStartI;
1459 }
1460
1461 // Determine the ranges of SVE callee-saves. This is done before emitting any
1462 // code at the end of the epilogue (for Swift async), which can get in the way
1463 // of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
1464 auto [PPR, ZPR] = getSVEStackFrameSizes();
1465 auto [PPRRange, ZPRRange] = partitionSVECS(
1466 MBB,
1468 ? MBB.getFirstTerminator()
1469 : FirstGPRRestoreI,
1470 PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1471
1472 if (HasFP && AFI->hasSwiftAsyncContext())
1473 emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
1474
1475 // If there is a single SP update, insert it before the ret and we're done.
1476 if (CombineSPBump) {
1477 assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE");
1478
1479 // When we are about to restore the CSRs, the CFA register is SP again.
1480 if (EmitCFI && HasFP)
1482 .buildDefCFA(AArch64::SP, NumBytes);
1483
1484 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1485 StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
1487 EmitCFI, StackOffset::getFixed(NumBytes));
1488 return;
1489 }
1490
1491 NumBytes -= PrologueSaveSize;
1492 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1493
1494 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
1495 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
1496
1497 // Deallocate the SVE area.
1499 assert(!SVEAllocs.AfterPPRs &&
1500 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
1501 // If the callee-save area is before FP, restoring the FP implicitly
1502 // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
1503 // explicitly.
1504 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1505 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1507 false, NeedsWinCFI, &HasWinCFI);
1508 }
1509
1510 // Deallocate callee-save SVE registers.
1511 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1513 false, NeedsWinCFI, &HasWinCFI);
1514 } else if (AFI->hasSVEStackSize()) {
1515 // If we have stack realignment or variable-sized objects we must use the FP
1516 // to restore SVE callee saves (as there is an unknown amount of
1517 // data/padding between the SP and SVE CS area).
1518 Register BaseForSVEDealloc =
1519 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1520 : AArch64::SP;
1521 if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
1522 if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1523 // The offset from the frame-pointer to the start of the ZPR saves.
1524 StackOffset FPOffsetZPR =
1525 -SVECalleeSavesSize - PPR.LocalsSize -
1526 StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
1527 // Deallocate the stack space space by moving the SP to the start of the
1528 // ZPR/PPR callee-save area.
1529 moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1530 }
1531 // With split SVE, the predicates are stored in a separate area above the
1532 // ZPR saves, so we must adjust the stack to the start of the PPRs.
1533 if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1534 // The offset from the frame-pointer to the start of the PPR saves.
1535 StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
1536 // Move to the start of the PPR area.
1537 assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
1538 emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::FP,
1539 FPOffsetPPR, TII, MachineInstr::FrameDestroy);
1540 }
1541 } else if (BaseForSVEDealloc == AArch64::SP) {
1542 auto NonSVELocals = StackOffset::getFixed(NumBytes);
1543 auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
1544 SVEAllocs.totalSize();
1545
1546 if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
1547 // Deallocate non-SVE locals now. This is needed to reach the SVE callee
1548 // saves, but may also allow combining stack hazard bumps for split SVE.
1549 SVEAllocs.AfterZPRs += NonSVELocals;
1550 NumBytes -= NonSVELocals.getFixed();
1551 }
1552 // To deallocate the SVE stack adjust by the allocations in reverse.
1553 emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1555 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1556 CFAOffset);
1557 CFAOffset -= SVEAllocs.AfterZPRs;
1558 assert(PPRRange.Begin == ZPRRange.End &&
1559 "Expected PPR restores after ZPR");
1560 emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1562 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1563 CFAOffset);
1564 CFAOffset -= SVEAllocs.AfterPPRs;
1565 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1567 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1568 CFAOffset);
1569 }
1570
1571 if (EmitCFI)
1572 emitCalleeSavedSVERestores(
1573 SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End);
1574 }
1575
1576 if (!HasFP) {
1577 bool RedZone = AFL.canUseRedZone(MF);
1578 // If this was a redzone leaf function, we don't need to restore the
1579 // stack pointer (but we may need to pop stack args for fastcc).
1580 if (RedZone && AfterCSRPopSize == 0)
1581 return;
1582
1583 // Pop the local variables off the stack. If there are no callee-saved
1584 // registers, it means we are actually positioned at the terminator and can
1585 // combine stack increment for the locals and the stack increment for
1586 // callee-popped arguments into (possibly) a single instruction and be done.
1587 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1588 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1589 if (NoCalleeSaveRestore)
1590 StackRestoreBytes += AfterCSRPopSize;
1591
1593 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1594 StackOffset::getFixed(StackRestoreBytes), TII,
1596 StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
1597
1598 // If we were able to combine the local stack pop with the argument pop,
1599 // then we're done.
1600 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1601 return;
1602
1603 NumBytes = 0;
1604 }
1605
1606 // Restore the original stack pointer.
1607 // FIXME: Rather than doing the math here, we should instead just use
1608 // non-post-indexed loads for the restores if we aren't actually going to
1609 // be able to save any instructions.
1610 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1612 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
1613 StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
1615 } else if (NumBytes)
1616 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1617 StackOffset::getFixed(NumBytes), TII,
1619
1620 // When we are about to restore the CSRs, the CFA register is SP again.
1621 if (EmitCFI && HasFP)
1623 .buildDefCFA(AArch64::SP, PrologueSaveSize);
1624
1625 // This must be placed after the callee-save restore code because that code
1626 // assumes the SP is at the same location as it was after the callee-save save
1627 // code in the prologue.
1628 if (AfterCSRPopSize) {
1629 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1630 "interrupt may have clobbered");
1631
1633 MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1635 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1636 StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
1637 }
1638}
1639
1640bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1641 uint64_t StackBumpBytes) const {
1643 StackBumpBytes))
1644 return false;
1645 if (MBB.empty())
1646 return true;
1647
1648 // Disable combined SP bump if the last instruction is an MTE tag store. It
1649 // is almost always better to merge SP adjustment into those instructions.
1650 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1651 MachineBasicBlock::iterator Begin = MBB.begin();
1652 while (LastI != Begin) {
1653 --LastI;
1654 if (LastI->isTransient())
1655 continue;
1656 if (!LastI->getFlag(MachineInstr::FrameDestroy))
1657 break;
1658 }
1659 switch (LastI->getOpcode()) {
1660 case AArch64::STGloop:
1661 case AArch64::STZGloop:
1662 case AArch64::STGi:
1663 case AArch64::STZGi:
1664 case AArch64::ST2Gi:
1665 case AArch64::STZ2Gi:
1666 return false;
1667 default:
1668 return true;
1669 }
1670 llvm_unreachable("unreachable");
1671}
1672
1673void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1675 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1677 // Avoid the reload as it is GOT relative, and instead fall back to the
1678 // hardcoded value below. This allows a mismatch between the OS and
1679 // application without immediately terminating on the difference.
1680 [[fallthrough]];
1682 // We need to reset FP to its untagged state on return. Bit 60 is
1683 // currently used to show the presence of an extended frame.
1684
1685 // BIC x29, x29, #0x1000_0000_0000_0000
1686 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
1687 AArch64::FP)
1688 .addUse(AArch64::FP)
1689 .addImm(0x10fe)
1691 if (NeedsWinCFI) {
1692 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1694 HasWinCFI = true;
1695 }
1696 break;
1697
1699 break;
1700 }
1701}
1702
1703void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1705 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1706 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
1707 .addReg(AArch64::X18, RegState::Define)
1708 .addReg(AArch64::LR, RegState::Define)
1709 .addReg(AArch64::X18)
1710 .addImm(-8)
1712
1713 if (NeedsWinCFI)
1714 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1716
1717 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1718 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1719 .buildRestore(AArch64::X18);
1720}
1721
1722void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1723 MachineBasicBlock::iterator MBBI, bool SVE) const {
1724 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1725 if (CSI.empty())
1726 return;
1727
1728 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1729
1730 for (const auto &Info : CSI) {
1731 if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
1732 continue;
1733
1734 MCRegister Reg = Info.getReg();
1735 if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
1736 continue;
1737
1738 CFIBuilder.buildRestore(Info.getReg());
1739 }
1740}
1741
1742void AArch64EpilogueEmitter::finalizeEpilogue() const {
1743 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1744 emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
1746 }
1747 if (EmitCFI)
1748 emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
1749 if (AFI->shouldSignReturnAddress(MF)) {
1750 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1751 // are inserted by emitPacRetPlusLeafHardening().
1752 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1753 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1754 TII->get(AArch64::PAUTH_EPILOGUE))
1756 }
1757 // AArch64PointerAuth pass will insert SEH_PACSignLR
1759 }
1760 if (HasWinCFI) {
1761 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1763 if (!MF.hasWinCFI())
1764 MF.setHasWinCFI(true);
1765 }
1766 if (NeedsWinCFI) {
1767 assert(SEHEpilogueStartI != MBB.end());
1768 if (!HasWinCFI)
1769 MBB.erase(SEHEpilogueStartI);
1770 }
1771}
1772
1773} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains constants used for implementing Dwarf debug support.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static const unsigned FramePtr
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0) const
bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize) const
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const
SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &)
const AArch64TargetLowering * getTargetLowering() const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void insertCFIInst(const MCCFIInstruction &CFIInst) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
A debug info location.
Definition DebugLoc.h:123
A set of physical registers with utility functions to track liveness when walking backward/forward th...
MachineInstrBundleIterator< MachineInstr > iterator
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static MachineOperand CreateImm(int64_t Val)
const char * getSymbolName() const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, RTLIB::Libcall LC)
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I)
@ Success
The lock was released successfully.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
static SVEPartitions partitionSVECS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, StackOffset PPRCalleeSavesSize, StackOffset ZPRCalleeSavesSize, bool IsEpilogue)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I)
static int64_t upperBound(StackOffset Size)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
MachineBasicBlock::iterator End
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 ZPR
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 PPR
MachineBasicBlock::iterator Begin