LLVM 22.0.0git
AArch64PrologueEpilogue.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "AArch64Subtarget.h"
14#include "llvm/ADT/Statistic.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
29}
30
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::PTRUE_C_B:
61 return I->getFlag(MachineInstr::FrameSetup) ||
63 case AArch64::SEH_SaveZReg:
64 return true;
65 }
66}
67
68// Convenience function to determine whether I is part of the PPR callee saves.
70 switch (I->getOpcode()) {
71 default:
72 return false;
73 case AArch64::STR_PXI:
74 case AArch64::LDR_PXI:
75 return I->getFlag(MachineInstr::FrameSetup) ||
77 case AArch64::SEH_SavePReg:
78 return true;
79 }
80}
81
82// Convenience function to determine whether I is part of the SVE callee saves.
86
90 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
91 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
92 RegInfo(*Subtarget.getRegisterInfo()) {
93 TII = Subtarget.getInstrInfo();
94 AFI = MF.getInfo<AArch64FunctionInfo>();
95
96 HasFP = AFL.hasFP(MF);
97 NeedsWinCFI = AFL.needsWinCFI(MF);
98
99 if (AFL.hasSVECalleeSavesAboveFrameRecord(MF)) {
100 if (AFI->hasStackHazardSlotIndex())
101 reportFatalUsageError("SME hazard padding is not supported on Windows");
102 SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
103 } else if (AFI->hasSplitSVEObjects()) {
104 SVELayout = SVEStackLayout::Split;
105 }
106}
107
110 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
111 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
112 unsigned NewOpc;
113
114 // If the function contains streaming mode changes, we expect instructions
115 // to calculate the value of VG before spilling. Move past these instructions
116 // if necessary.
117 if (AFL.requiresSaveVG(MF)) {
118 auto &TLI = *Subtarget.getTargetLowering();
119 while (isVGInstruction(MBBI, TLI))
120 ++MBBI;
121 }
122
123 switch (MBBI->getOpcode()) {
124 default:
125 llvm_unreachable("Unexpected callee-save save/restore opcode!");
126 case AArch64::STPXi:
127 NewOpc = AArch64::STPXpre;
128 break;
129 case AArch64::STPDi:
130 NewOpc = AArch64::STPDpre;
131 break;
132 case AArch64::STPQi:
133 NewOpc = AArch64::STPQpre;
134 break;
135 case AArch64::STRXui:
136 NewOpc = AArch64::STRXpre;
137 break;
138 case AArch64::STRDui:
139 NewOpc = AArch64::STRDpre;
140 break;
141 case AArch64::STRQui:
142 NewOpc = AArch64::STRQpre;
143 break;
144 case AArch64::LDPXi:
145 NewOpc = AArch64::LDPXpost;
146 break;
147 case AArch64::LDPDi:
148 NewOpc = AArch64::LDPDpost;
149 break;
150 case AArch64::LDPQi:
151 NewOpc = AArch64::LDPQpost;
152 break;
153 case AArch64::LDRXui:
154 NewOpc = AArch64::LDRXpost;
155 break;
156 case AArch64::LDRDui:
157 NewOpc = AArch64::LDRDpost;
158 break;
159 case AArch64::LDRQui:
160 NewOpc = AArch64::LDRQpost;
161 break;
162 }
163 TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
164 int64_t MinOffset, MaxOffset;
165 bool Success = TII->getMemOpInfo(NewOpc, Scale, Width, MinOffset, MaxOffset);
166 (void)Success;
167 assert(Success && "unknown load/store opcode");
168
169 // If the first store isn't right where we want SP then we can't fold the
170 // update in so create a normal arithmetic instruction instead.
171 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
172 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
173 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
174 // If we are destroying the frame, make sure we add the increment after the
175 // last frame operation.
176 if (FrameFlag == MachineInstr::FrameDestroy) {
177 ++MBBI;
178 // Also skip the SEH instruction, if needed
179 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
180 ++MBBI;
181 }
182 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
183 StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
184 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
185 StackOffset::getFixed(CFAOffset));
186
187 return std::prev(MBBI);
188 }
189
190 // Get rid of the SEH code associated with the old instruction.
191 if (NeedsWinCFI) {
192 auto SEH = std::next(MBBI);
193 if (AArch64InstrInfo::isSEHInstruction(*SEH))
194 SEH->eraseFromParent();
195 }
196
197 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
198 MIB.addReg(AArch64::SP, RegState::Define);
199
200 // Copy all operands other than the immediate offset.
201 unsigned OpndIdx = 0;
202 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
203 ++OpndIdx)
204 MIB.add(MBBI->getOperand(OpndIdx));
205
206 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
207 "Unexpected immediate offset in first/last callee-save save/restore "
208 "instruction!");
209 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
210 "Unexpected base register in callee-save save/restore instruction!");
211 assert(CSStackSizeInc % Scale == 0);
212 MIB.addImm(CSStackSizeInc / (int)Scale);
213
214 MIB.setMIFlags(MBBI->getFlags());
215 MIB.setMemRefs(MBBI->memoperands());
216
217 // Generate a new SEH code that corresponds to the new instruction.
218 if (NeedsWinCFI) {
219 HasWinCFI = true;
220 AFL.insertSEH(*MIB, *TII, FrameFlag);
221 }
222
223 if (EmitCFI)
224 CFIInstBuilder(MBB, MBBI, FrameFlag)
225 .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
226
227 return std::prev(MBB.erase(MBBI));
228}
229
230// Fix up the SEH opcode associated with the save/restore instruction.
232 unsigned LocalStackSize) {
233 MachineOperand *ImmOpnd = nullptr;
234 unsigned ImmIdx = MBBI->getNumOperands() - 1;
235 switch (MBBI->getOpcode()) {
236 default:
237 llvm_unreachable("Fix the offset in the SEH instruction");
238 case AArch64::SEH_SaveFPLR:
239 case AArch64::SEH_SaveRegP:
240 case AArch64::SEH_SaveReg:
241 case AArch64::SEH_SaveFRegP:
242 case AArch64::SEH_SaveFReg:
243 case AArch64::SEH_SaveAnyRegI:
244 case AArch64::SEH_SaveAnyRegIP:
245 case AArch64::SEH_SaveAnyRegQP:
246 case AArch64::SEH_SaveAnyRegQPX:
247 ImmOpnd = &MBBI->getOperand(ImmIdx);
248 break;
249 }
250 if (ImmOpnd)
251 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
252}
253
255 MachineInstr &MI, uint64_t LocalStackSize) const {
256 if (AArch64InstrInfo::isSEHInstruction(MI))
257 return;
258
259 unsigned Opc = MI.getOpcode();
260 unsigned Scale;
261 switch (Opc) {
262 case AArch64::STPXi:
263 case AArch64::STRXui:
264 case AArch64::STPDi:
265 case AArch64::STRDui:
266 case AArch64::LDPXi:
267 case AArch64::LDRXui:
268 case AArch64::LDPDi:
269 case AArch64::LDRDui:
270 Scale = 8;
271 break;
272 case AArch64::STPQi:
273 case AArch64::STRQui:
274 case AArch64::LDPQi:
275 case AArch64::LDRQui:
276 Scale = 16;
277 break;
278 default:
279 llvm_unreachable("Unexpected callee-save save/restore opcode!");
280 }
281
282 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
283 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
284 "Unexpected base register in callee-save save/restore instruction!");
285 // Last operand is immediate offset that needs fixing.
286 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
287 // All generated opcodes have scaled offsets.
288 assert(LocalStackSize % Scale == 0);
289 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
290
291 if (NeedsWinCFI) {
292 HasWinCFI = true;
293 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
294 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
295 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
296 "Expecting a SEH instruction");
297 fixupSEHOpcode(MBBI, LocalStackSize);
298 }
299}
300
302 uint64_t StackBumpBytes) const {
303 if (AFL.homogeneousPrologEpilog(MF))
304 return false;
305
306 if (AFI->getLocalStackSize() == 0)
307 return false;
308
309 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
310 // (to force a stp with predecrement) to match the packed unwind format,
311 // provided that there actually are any callee saved registers to merge the
312 // decrement with.
313 // This is potentially marginally slower, but allows using the packed
314 // unwind format for functions that both have a local area and callee saved
315 // registers. Using the packed unwind format notably reduces the size of
316 // the unwind info.
317 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
318 MF.getFunction().hasOptSize())
319 return false;
320
321 // 512 is the maximum immediate for stp/ldp that will be used for
322 // callee-save save/restores
323 if (StackBumpBytes >= 512 ||
324 AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
325 return false;
326
327 if (MFI.hasVarSizedObjects())
328 return false;
329
330 if (RegInfo.hasStackRealignment(MF))
331 return false;
332
333 // This isn't strictly necessary, but it simplifies things a bit since the
334 // current RedZone handling code assumes the SP is adjusted by the
335 // callee-save save/restore code.
336 if (AFL.canUseRedZone(MF))
337 return false;
338
339 // When there is an SVE area on the stack, always allocate the
340 // callee-saves and spills/locals separately.
341 if (AFI->hasSVEStackSize())
342 return false;
343
344 return true;
345}
346
348 StackOffset PPRCalleeSavesSize =
349 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
350 StackOffset ZPRCalleeSavesSize =
351 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
352 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
353 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
355 return {{PPRCalleeSavesSize, PPRLocalsSize},
356 {ZPRCalleeSavesSize, ZPRLocalsSize}};
357 // For simplicity, attribute all locals to ZPRs when split SVE is disabled.
358 return {{PPRCalleeSavesSize, StackOffset{}},
359 {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
360}
361
363 SVEFrameSizes const &SVE) {
364 StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
365 StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
366 StackOffset AfterPPRs = {};
368 BeforePPRs = SVE.PPR.CalleeSavesSize;
369 // If there are no ZPR CSRs, place all local allocations after the ZPRs.
370 if (SVE.ZPR.CalleeSavesSize)
371 AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
372 else
373 AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
374 }
375 return {BeforePPRs, AfterPPRs, AfterZPRs};
376}
377
383
386 StackOffset PPRCalleeSavesSize,
387 StackOffset ZPRCalleeSavesSize,
388 bool IsEpilogue) {
391 IsEpilogue ? MBB.begin() : MBB.getFirstTerminator();
392 auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; };
393 // Process the SVE CS to find the starts/ends of the ZPR and PPR areas.
394 if (PPRCalleeSavesSize) {
395 PPRsI = AdjustI(PPRsI);
396 assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction");
397 while (PPRsI != End && isPartOfPPRCalleeSaves(AdjustI(PPRsI)))
398 IsEpilogue ? (--PPRsI) : (++PPRsI);
399 }
400 MachineBasicBlock::iterator ZPRsI = PPRsI;
401 if (ZPRCalleeSavesSize) {
402 ZPRsI = AdjustI(ZPRsI);
403 assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction");
404 while (ZPRsI != End && isPartOfZPRCalleeSaves(AdjustI(ZPRsI)))
405 IsEpilogue ? (--ZPRsI) : (++ZPRsI);
406 }
407 if (IsEpilogue)
408 return {{PPRsI, MBBI}, {ZPRsI, PPRsI}};
409 return {{MBBI, PPRsI}, {PPRsI, ZPRsI}};
410}
411
416 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
417 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
418 IsFunclet = MBB.isEHFuncletEntry();
419 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
420
421#ifndef NDEBUG
422 collectBlockLiveins();
423#endif
424}
425
426#ifndef NDEBUG
427/// Collect live registers from the end of \p MI's parent up to (including) \p
428/// MI in \p LiveRegs.
431
432 MachineBasicBlock &MBB = *MI.getParent();
433 LiveRegs.addLiveOuts(MBB);
434 for (const MachineInstr &MI :
435 reverse(make_range(MI.getIterator(), MBB.instr_end())))
436 LiveRegs.stepBackward(MI);
437}
438
439void AArch64PrologueEmitter::collectBlockLiveins() {
440 // Collect live register from the end of MBB up to the start of the existing
441 // frame setup instructions.
442 PrologueEndI = MBB.begin();
443 while (PrologueEndI != MBB.end() &&
444 PrologueEndI->getFlag(MachineInstr::FrameSetup))
445 ++PrologueEndI;
446
447 if (PrologueEndI != MBB.end()) {
448 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
449 // Ignore registers used for stack management for now.
450 LiveRegs.removeReg(AArch64::SP);
451 LiveRegs.removeReg(AArch64::X19);
452 LiveRegs.removeReg(AArch64::FP);
453 LiveRegs.removeReg(AArch64::LR);
454
455 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
456 // This is necessary to spill VG if required where SVE is unavailable, but
457 // X0 is preserved around this call.
458 if (requiresGetVGCall())
459 LiveRegs.removeReg(AArch64::X0);
460 }
461}
462
463void AArch64PrologueEmitter::verifyPrologueClobbers() const {
464 if (PrologueEndI == MBB.end())
465 return;
466 // Check if any of the newly instructions clobber any of the live registers.
467 for (MachineInstr &MI :
468 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
469 for (auto &Op : MI.operands())
470 if (Op.isReg() && Op.isDef())
471 assert(!LiveRegs.contains(Op.getReg()) &&
472 "live register clobbered by inserted prologue instructions");
473 }
474}
475#endif
476
477void AArch64PrologueEmitter::determineLocalsStackSize(
478 uint64_t StackSize, uint64_t PrologueSaveSize) {
479 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
480 CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
481}
482
483// Return the maximum possible number of bytes for `Size` due to the
484// architectural limit on the size of a SVE register.
485static int64_t upperBound(StackOffset Size) {
486 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
487 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
488}
489
490void AArch64PrologueEmitter::allocateStackSpace(
491 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
492 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
493 bool FollowupAllocs) {
494
495 if (!AllocSize)
496 return;
497
498 DebugLoc DL;
499 const int64_t MaxAlign = MFI.getMaxAlign().value();
500 const uint64_t AndMask = ~(MaxAlign - 1);
501
503 Register TargetReg = RealignmentPadding
504 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
505 : AArch64::SP;
506 // SUB Xd/SP, SP, AllocSize
507 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
509 EmitCFI, InitialOffset);
510
511 if (RealignmentPadding) {
512 // AND SP, X9, 0b11111...0000
513 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
514 .addReg(TargetReg, RegState::Kill)
517 AFI->setStackRealigned(true);
518
519 // No need for SEH instructions here; if we're realigning the stack,
520 // we've set a frame pointer and already finished the SEH prologue.
522 }
523 return;
524 }
525
526 //
527 // Stack probing allocation.
528 //
529
530 // Fixed length allocation. If we don't need to re-align the stack and don't
531 // have SVE objects, we can use a more efficient sequence for stack probing.
532 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
533 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
534 assert(ScratchReg != AArch64::NoRegister);
535 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
536 .addDef(ScratchReg)
537 .addImm(AllocSize.getFixed())
538 .addImm(InitialOffset.getFixed())
539 .addImm(InitialOffset.getScalable());
540 // The fixed allocation may leave unprobed bytes at the top of the
541 // stack. If we have subsequent allocation (e.g. if we have variable-sized
542 // objects), we need to issue an extra probe, so these allocations start in
543 // a known state.
544 if (FollowupAllocs) {
545 // STR XZR, [SP]
546 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
547 .addReg(AArch64::XZR)
548 .addReg(AArch64::SP)
549 .addImm(0)
551 }
552
553 return;
554 }
555
556 // Variable length allocation.
557
558 // If the (unknown) allocation size cannot exceed the probe size, decrement
559 // the stack pointer right away.
560 int64_t ProbeSize = AFI->getStackProbeSize();
561 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
562 Register ScratchReg = RealignmentPadding
563 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
564 : AArch64::SP;
565 assert(ScratchReg != AArch64::NoRegister);
566 // SUB Xd, SP, AllocSize
567 emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
569 EmitCFI, InitialOffset);
570 if (RealignmentPadding) {
571 // AND SP, Xn, 0b11111...0000
572 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
573 .addReg(ScratchReg, RegState::Kill)
576 AFI->setStackRealigned(true);
577 }
578 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
580 // STR XZR, [SP]
581 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
582 .addReg(AArch64::XZR)
583 .addReg(AArch64::SP)
584 .addImm(0)
586 }
587 return;
588 }
589
590 // Emit a variable-length allocation probing loop.
591 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
592 // each of them guaranteed to adjust the stack by less than the probe size.
593 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
594 assert(TargetReg != AArch64::NoRegister);
595 // SUB Xd, SP, AllocSize
596 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
598 EmitCFI, InitialOffset);
599 if (RealignmentPadding) {
600 // AND Xn, Xn, 0b11111...0000
601 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
602 .addReg(TargetReg, RegState::Kill)
605 }
606
607 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
608 .addReg(TargetReg);
609 if (EmitCFI) {
610 // Set the CFA register back to SP.
611 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
612 .buildDefCFARegister(AArch64::SP);
613 }
614 if (RealignmentPadding)
615 AFI->setStackRealigned(true);
616}
617
619 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
620 const MachineBasicBlock::iterator EndI = MBB.end();
621
622 // At this point, we're going to decide whether or not the function uses a
623 // redzone. In most cases, the function doesn't have a redzone so let's
624 // assume that's false and set it to true in the case that there's a redzone.
625 AFI->setHasRedZone(false);
626
627 // Debug location must be unknown since the first debug location is used
628 // to determine the end of the prologue.
629 DebugLoc DL;
630
631 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
632 // have a tail-call where the caller only needs to adjust the stack pointer in
633 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
634 // See `seh-minimal-prologue-epilogue.ll` test cases.
635 if (AFI->getArgumentStackToRestore())
636 HasWinCFI = true;
637
638 if (AFI->shouldSignReturnAddress(MF)) {
639 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
640 // are inserted by emitPacRetPlusLeafHardening().
641 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
642 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
644 }
645 // AArch64PointerAuth pass will insert SEH_PACSignLR
647 }
648
649 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
650 emitShadowCallStackPrologue(PrologueBeginI, DL);
652 }
653
654 if (EmitCFI && AFI->isMTETagged())
655 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
657
658 // We signal the presence of a Swift extended frame to external tools by
659 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
660 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
661 // bits so that is still true.
662 if (HasFP && AFI->hasSwiftAsyncContext())
663 emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
664
665 // All calls are tail calls in GHC calling conv, and functions have no
666 // prologue/epilogue.
667 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
668 return;
669
670 // Set tagged base pointer to the requested stack slot. Ideally it should
671 // match SP value after prologue.
672 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
673 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
674 else
675 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
676
677 // getStackSize() includes all the locals in its size calculation. We don't
678 // include these locals when computing the stack size of a funclet, as they
679 // are allocated in the parent's stack frame and accessed via the frame
680 // pointer from the funclet. We only save the callee saved registers in the
681 // funclet, which are really the callee saved registers of the parent
682 // function, including the funclet.
683 int64_t NumBytes =
684 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
685 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
686 return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
687
688 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
689 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
690
691 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
692 // All of the remaining stack allocations are for locals.
693 determineLocalsStackSize(NumBytes, PrologueSaveSize);
694
695 auto [PPR, ZPR] = getSVEStackFrameSizes();
696 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
697
698 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
700 assert(!SVEAllocs.AfterPPRs &&
701 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
702 // If we're doing SVE saves first, we need to immediately allocate space
703 // for fixed objects, then space for the SVE callee saves.
704 //
705 // Windows unwind requires that the scalable size is a multiple of 16;
706 // that's handled when the callee-saved size is computed.
707 auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
708 allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
709 /*FollowupAllocs=*/true);
710 NumBytes -= FixedObject;
711
712 // Now allocate space for the GPR callee saves.
713 MachineBasicBlock::iterator MBBI = PrologueBeginI;
714 while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
715 ++MBBI;
717 MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
718 NumBytes -= AFI->getCalleeSavedStackSize();
719 } else if (CombineSPBump) {
720 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
721 emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
722 StackOffset::getFixed(-NumBytes), TII,
724 EmitAsyncCFI);
725 NumBytes = 0;
726 } else if (HomPrologEpilog) {
727 // Stack has been already adjusted.
728 NumBytes -= PrologueSaveSize;
729 } else if (PrologueSaveSize != 0) {
731 PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
732 NumBytes -= PrologueSaveSize;
733 }
734 assert(NumBytes >= 0 && "Negative stack allocation size!?");
735
736 // Move past the saves of the callee-saved registers, fixing up the offsets
737 // and pre-inc if we decided to combine the callee-save and local stack
738 // pointer bump above.
739 auto &TLI = *Subtarget.getTargetLowering();
740
741 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
742 while (AfterGPRSavesI != EndI &&
743 AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
744 !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
745 if (CombineSPBump &&
746 // Only fix-up frame-setup load/store instructions.
747 (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
748 fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
749 AFI->getLocalStackSize());
750 ++AfterGPRSavesI;
751 }
752
753 // For funclets the FP belongs to the containing function. Only set up FP if
754 // we actually need to.
755 if (!IsFunclet && HasFP)
756 emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
757
758 // Now emit the moves for whatever callee saved regs we have (including FP,
759 // LR if those are saved). Frame instructions for SVE register are emitted
760 // later, after the instruction which actually save SVE regs.
761 if (EmitAsyncCFI)
762 emitCalleeSavedGPRLocations(AfterGPRSavesI);
763
764 // Alignment is required for the parent frame, not the funclet
765 const bool NeedsRealignment =
766 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
767 const int64_t RealignmentPadding =
768 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
769 ? MFI.getMaxAlign().value() - 16
770 : 0;
771
772 if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
773 emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
774
775 StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
776 SVEAllocs.AfterZPRs += NonSVELocalsSize;
777
778 StackOffset CFAOffset =
779 StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
780 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
781 // Allocate space for the callee saves and PPR locals (if any).
783 auto [PPRRange, ZPRRange] =
784 partitionSVECS(MBB, AfterGPRSavesI, PPR.CalleeSavesSize,
785 ZPR.CalleeSavesSize, /*IsEpilogue=*/false);
786 AfterSVESavesI = ZPRRange.End;
787 if (EmitAsyncCFI)
788 emitCalleeSavedSVELocations(AfterSVESavesI);
789
790 allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
791 EmitAsyncCFI && !HasFP, CFAOffset,
792 MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
793 SVEAllocs.AfterZPRs);
794 CFAOffset += SVEAllocs.BeforePPRs;
795 assert(PPRRange.End == ZPRRange.Begin &&
796 "Expected ZPR callee saves after PPR locals");
797 allocateStackSpace(PPRRange.End, 0, SVEAllocs.AfterPPRs,
798 EmitAsyncCFI && !HasFP, CFAOffset,
799 MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
800 CFAOffset += SVEAllocs.AfterPPRs;
801 } else {
803 // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
804 // already been allocated. PPR locals (included in AfterPPRs) are not
805 // supported (note: this is asserted above).
806 CFAOffset += SVEAllocs.BeforePPRs;
807 }
808
809 // Allocate space for the rest of the frame including ZPR locals. Align the
810 // stack as necessary.
811 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
812 "Cannot use redzone with stack realignment");
813 if (!AFL.canUseRedZone(MF)) {
814 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
815 // correct value here, as NumBytes also includes padding bytes, which
816 // shouldn't be counted here.
817 allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
818 EmitAsyncCFI && !HasFP, CFAOffset,
819 MFI.hasVarSizedObjects());
820 }
821
822 // If we need a base pointer, set it up here. It's whatever the value of the
823 // stack pointer is at this point. Any variable size objects will be
824 // allocated after this, so we can still use the base pointer to reference
825 // locals.
826 //
827 // FIXME: Clarify FrameSetup flags here.
828 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
829 // needed.
830 // For funclets the BP belongs to the containing function.
831 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
832 TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
833 AArch64::SP, false);
834 if (NeedsWinCFI) {
835 HasWinCFI = true;
836 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
838 }
839 }
840
841 // The very last FrameSetup instruction indicates the end of prologue. Emit a
842 // SEH opcode indicating the prologue end.
843 if (NeedsWinCFI && HasWinCFI) {
844 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
846 }
847
848 // SEH funclets are passed the frame pointer in X1. If the parent
849 // function uses the base register, then the base register is used
850 // directly, and is not retrieved from X1.
851 if (IsFunclet && F.hasPersonalityFn()) {
852 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
854 BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
855 AArch64::FP)
856 .addReg(AArch64::X1)
858 MBB.addLiveIn(AArch64::X1);
859 }
860 }
861
862 if (EmitCFI && !EmitAsyncCFI) {
863 if (HasFP) {
864 emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
865 } else {
866 StackOffset TotalSize =
867 AFL.getSVEStackSize(MF) +
868 StackOffset::getFixed((int64_t)MFI.getStackSize());
869 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
870 CFIBuilder.insertCFIInst(
871 createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
872 TotalSize, /*LastAdjustmentWasScalable=*/false));
873 }
874 emitCalleeSavedGPRLocations(AfterSVESavesI);
875 emitCalleeSavedSVELocations(AfterSVESavesI);
876 }
877}
878
879void AArch64PrologueEmitter::emitShadowCallStackPrologue(
881 // Shadow call stack prolog: str x30, [x18], #8
882 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
883 .addReg(AArch64::X18, RegState::Define)
884 .addReg(AArch64::LR)
885 .addReg(AArch64::X18)
886 .addImm(8)
888
889 // This instruction also makes x18 live-in to the entry block.
890 MBB.addLiveIn(AArch64::X18);
891
892 if (NeedsWinCFI)
893 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
895
896 if (EmitCFI) {
897 // Emit a CFI instruction that causes 8 to be subtracted from the value of
898 // x18 when unwinding past this frame.
899 static const char CFIInst[] = {
900 dwarf::DW_CFA_val_expression,
901 18, // register
902 2, // length
903 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
904 static_cast<char>(-8) & 0x7f, // addend (sleb128)
905 };
906 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
907 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
908 }
909}
910
911void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
913 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
915 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
916 // The special symbol below is absolute and has a *value* that can be
917 // combined with the frame pointer to signal an extended frame.
918 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
919 .addExternalSymbol("swift_async_extendedFramePointerFlags",
921 if (NeedsWinCFI) {
922 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
924 HasWinCFI = true;
925 }
926 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
927 .addUse(AArch64::FP)
928 .addUse(AArch64::X16)
929 .addImm(Subtarget.isTargetILP32() ? 32 : 0);
930 if (NeedsWinCFI) {
931 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
933 HasWinCFI = true;
934 }
935 break;
936 }
937 [[fallthrough]];
938
940 // ORR x29, x29, #0x1000_0000_0000_0000
941 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
942 .addUse(AArch64::FP)
943 .addImm(0x1100)
945 if (NeedsWinCFI) {
946 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
948 HasWinCFI = true;
949 }
950 break;
951
953 break;
954 }
955}
956
957void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
958 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
959 const DebugLoc &DL) const {
960 assert(!HasFP && "unexpected function without stack frame but with FP");
961 assert(!AFL.getSVEStackSize(MF) &&
962 "unexpected function without stack frame but with SVE objects");
963 // All of the stack allocation is for locals.
964 AFI->setLocalStackSize(NumBytes);
965 if (!NumBytes) {
966 if (NeedsWinCFI && HasWinCFI) {
967 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
969 }
970 return;
971 }
972 // REDZONE: If the stack size is less than 128 bytes, we don't need
973 // to actually allocate.
974 if (AFL.canUseRedZone(MF)) {
975 AFI->setHasRedZone(true);
976 ++NumRedZoneFunctions;
977 } else {
978 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
979 StackOffset::getFixed(-NumBytes), TII,
981 if (EmitCFI) {
982 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
983 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
984 // Encode the stack size of the leaf function.
985 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
986 .buildDefCFAOffset(NumBytes, FrameLabel);
987 }
988 }
989
990 if (NeedsWinCFI) {
991 HasWinCFI = true;
992 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
994 }
995}
996
997void AArch64PrologueEmitter::emitFramePointerSetup(
999 unsigned FixedObject) {
1000 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
1001 if (CombineSPBump)
1002 FPOffset += AFI->getLocalStackSize();
1003
1004 if (AFI->hasSwiftAsyncContext()) {
1005 // Before we update the live FP we have to ensure there's a valid (or
1006 // null) asynchronous context in its slot just before FP in the frame
1007 // record, so store it now.
1008 const auto &Attrs = MF.getFunction().getAttributes();
1009 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1010 if (HaveInitialContext)
1011 MBB.addLiveIn(AArch64::X22);
1012 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1013 BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
1014 .addUse(Reg)
1015 .addUse(AArch64::SP)
1016 .addImm(FPOffset - 8)
1018 if (NeedsWinCFI) {
1019 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1020 // to multiple instructions, should be mutually-exclusive.
1021 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1022 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1024 HasWinCFI = true;
1025 }
1026 }
1027
1028 if (HomPrologEpilog) {
1029 auto Prolog = MBBI;
1030 --Prolog;
1031 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1032 Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1033 } else {
1034 // Issue sub fp, sp, FPOffset or
1035 // mov fp,sp when FPOffset is zero.
1036 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1037 // This code marks the instruction(s) that set the FP also.
1038 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1039 StackOffset::getFixed(FPOffset), TII,
1041 if (NeedsWinCFI && HasWinCFI) {
1042 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1044 // After setting up the FP, the rest of the prolog doesn't need to be
1045 // included in the SEH unwind info.
1046 NeedsWinCFI = false;
1047 }
1048 }
1049 if (EmitAsyncCFI)
1050 emitDefineCFAWithFP(MBBI, FixedObject);
1051}
1052
1053// Define the current CFA rule to use the provided FP.
1054void AArch64PrologueEmitter::emitDefineCFAWithFP(
1055 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1056 const int OffsetToFirstCalleeSaveFromFP =
1057 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1058 AFI->getCalleeSavedStackSize();
1059 Register FramePtr = RegInfo.getFrameRegister(MF);
1060 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1061 .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
1062}
1063
1064void AArch64PrologueEmitter::emitWindowsStackProbe(
1065 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1066 int64_t RealignmentPadding) const {
1067 if (AFI->getSVECalleeSavedStackSize())
1068 report_fatal_error("SVE callee saves not yet supported with stack probing");
1069
1070 // Find an available register to spill the value of X15 to, if X15 is being
1071 // used already for nest.
1072 unsigned X15Scratch = AArch64::NoRegister;
1073 if (llvm::any_of(MBB.liveins(),
1074 [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1075 return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
1076 LiveIn.PhysReg);
1077 })) {
1078 X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
1079 assert(X15Scratch != AArch64::NoRegister &&
1080 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1081#ifndef NDEBUG
1082 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1083#endif
1084 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
1085 .addReg(AArch64::XZR)
1086 .addReg(AArch64::X15, RegState::Undef)
1087 .addReg(AArch64::X15, RegState::Implicit)
1089 }
1090
1091 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1092 if (NeedsWinCFI) {
1093 HasWinCFI = true;
1094 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1095 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1096 // This is at most two instructions, MOVZ followed by MOVK.
1097 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1098 // exceeding 256MB in size.
1099 if (NumBytes >= (1 << 28))
1100 report_fatal_error("Stack size cannot exceed 256MB for stack "
1101 "unwinding purposes");
1102
1103 uint32_t LowNumWords = NumWords & 0xFFFF;
1104 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1105 .addImm(LowNumWords)
1108 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1110 if ((NumWords & 0xFFFF0000) != 0) {
1111 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1112 .addReg(AArch64::X15)
1113 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1116 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1118 }
1119 } else {
1120 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1121 .addImm(NumWords)
1123 }
1124
1125 const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
1126 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(RTLIB::STACK_PROBE);
1127 if (ChkStkLibcall == RTLIB::Unsupported)
1128 reportFatalUsageError("no available implementation of __chkstk");
1129
1130 const char *ChkStk = TLI->getLibcallImplName(ChkStkLibcall).data();
1131 switch (MF.getTarget().getCodeModel()) {
1132 case CodeModel::Tiny:
1133 case CodeModel::Small:
1134 case CodeModel::Medium:
1135 case CodeModel::Kernel:
1136 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1137 .addExternalSymbol(ChkStk)
1138 .addReg(AArch64::X15, RegState::Implicit)
1139 .addReg(AArch64::X16,
1141 .addReg(AArch64::X17,
1143 .addReg(AArch64::NZCV,
1146 if (NeedsWinCFI) {
1147 HasWinCFI = true;
1148 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1150 }
1151 break;
1152 case CodeModel::Large:
1153 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1154 .addReg(AArch64::X16, RegState::Define)
1155 .addExternalSymbol(ChkStk)
1156 .addExternalSymbol(ChkStk)
1158 if (NeedsWinCFI) {
1159 HasWinCFI = true;
1160 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1162 }
1163
1165 .addReg(AArch64::X16, RegState::Kill)
1167 .addReg(AArch64::X16,
1169 .addReg(AArch64::X17,
1171 .addReg(AArch64::NZCV,
1174 if (NeedsWinCFI) {
1175 HasWinCFI = true;
1176 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1178 }
1179 break;
1180 }
1181
1182 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1183 .addReg(AArch64::SP, RegState::Kill)
1184 .addReg(AArch64::X15, RegState::Kill)
1187 if (NeedsWinCFI) {
1188 HasWinCFI = true;
1189 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1190 .addImm(NumBytes)
1192 }
1193 NumBytes = 0;
1194
1195 if (RealignmentPadding > 0) {
1196 if (RealignmentPadding >= 4096) {
1197 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
1198 .addReg(AArch64::X16, RegState::Define)
1199 .addImm(RealignmentPadding)
1201 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
1202 .addReg(AArch64::SP)
1203 .addReg(AArch64::X16, RegState::Kill)
1206 } else {
1207 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
1208 .addReg(AArch64::SP)
1209 .addImm(RealignmentPadding)
1210 .addImm(0)
1212 }
1213
1214 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1215 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1216 .addReg(AArch64::X15, RegState::Kill)
1218 AFI->setStackRealigned(true);
1219
1220 // No need for SEH instructions here; if we're realigning the stack,
1221 // we've set a frame pointer and already finished the SEH prologue.
1223 }
1224 if (X15Scratch != AArch64::NoRegister) {
1225 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
1226 .addReg(AArch64::XZR)
1227 .addReg(X15Scratch, RegState::Undef)
1228 .addReg(X15Scratch, RegState::Implicit)
1230 }
1231}
1232
1233void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1235 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1236 if (CSI.empty())
1237 return;
1238
1239 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1240 for (const auto &Info : CSI) {
1241 unsigned FrameIdx = Info.getFrameIdx();
1242 if (MFI.hasScalableStackID(FrameIdx))
1243 continue;
1244
1245 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1246 int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
1247 CFIBuilder.buildOffset(Info.getReg(), Offset);
1248 }
1249}
1250
1251void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1253 // Add callee saved registers to move list.
1254 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1255 if (CSI.empty())
1256 return;
1257
1258 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1259
1260 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1261 if (AFL.requiresSaveVG(MF)) {
1262 auto IncomingVG = *find_if(
1263 reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
1264 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
1265 AFL.getOffsetOfLocalArea();
1266 }
1267
1268 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1269 for (const auto &Info : CSI) {
1270 int FI = Info.getFrameIdx();
1271 if (!MFI.hasScalableStackID(FI))
1272 continue;
1273
1274 // Not all unwinders may know about SVE registers, so assume the lowest
1275 // common denominator.
1276 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1277 MCRegister Reg = Info.getReg();
1278 if (!RegInfo.regNeedsCFI(Reg, Reg))
1279 continue;
1280
1281 StackOffset Offset =
1282 StackOffset::getScalable(MFI.getObjectOffset(FI)) -
1283 StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
1284
1285 // The scalable vectors are below (lower address) the scalable predicates
1286 // with split SVE objects, so we must subtract the size of the predicates.
1288 MFI.getStackID(FI) == TargetStackID::ScalableVector)
1289 Offset -= PPRStackSize;
1290
1291 CFIBuilder.insertCFIInst(
1292 createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
1293 }
1294}
1295
1297 switch (MI.getOpcode()) {
1298 default:
1299 return false;
1300 case AArch64::CATCHRET:
1301 case AArch64::CLEANUPRET:
1302 return true;
1303 }
1304}
1305
1310 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1311 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
1312 SEHEpilogueStartI = MBB.end();
1313}
1314
1315void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
1317 // Other combinations could be supported, but are not currently needed.
1318 assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
1319 "expected negative offset (with optional fixed portion)");
1320 Register Base = AArch64::FP;
1321 if (int64_t FixedOffset = Offset.getFixed()) {
1322 // If we have a negative fixed offset, we need to first subtract it in a
1323 // temporary register first (to avoid briefly deallocating the scalable
1324 // portion of the offset).
1325 Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
1326 emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP,
1327 StackOffset::getFixed(FixedOffset), TII,
1329 }
1330 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base,
1331 StackOffset::getScalable(Offset.getScalable()), TII,
1333}
1334
1336 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1337 if (MBB.end() != EpilogueEndI) {
1338 DL = EpilogueEndI->getDebugLoc();
1339 IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
1340 }
1341
1342 int64_t NumBytes =
1343 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1344
1345 // All calls are tail calls in GHC calling conv, and functions have no
1346 // prologue/epilogue.
1347 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1348 return;
1349
1350 // How much of the stack used by incoming arguments this function is expected
1351 // to restore in this particular epilogue.
1352 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1353 bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
1354 MF.getFunction().isVarArg());
1355 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1356
1357 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1358 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1359 // We cannot rely on the local stack size set in emitPrologue if the function
1360 // has funclets, as funclets have different local stack size requirements, and
1361 // the current value set in emitPrologue may be that of the containing
1362 // function.
1363 if (MF.hasEHFunclets())
1364 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1365
1366 if (HomPrologEpilog) {
1368 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1369 if (FirstHomogenousEpilogI != MBB.begin()) {
1370 auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
1371 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1372 FirstHomogenousEpilogI = HomogeneousEpilog;
1373 }
1374
1375 // Adjust local stack
1376 emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
1377 StackOffset::getFixed(AFI->getLocalStackSize()), TII,
1379
1380 // SP has been already adjusted while restoring callee save regs.
1381 // We've bailed-out the case with adjusting SP for arguments.
1382 assert(AfterCSRPopSize == 0);
1383 return;
1384 }
1385
1386 bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1387
1388 unsigned ProloguePopSize = PrologueSaveSize;
1390 // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1391 // that needs to be popped until we reach the start of the SVE save area.
1392 // The "FixedObject" stack occurs after the SVE area and must be popped
1393 // later.
1394 ProloguePopSize -= FixedObject;
1395 AfterCSRPopSize += FixedObject;
1396 }
1397
1398 // Assume we can't combine the last pop with the sp restore.
1399 if (!CombineSPBump && ProloguePopSize != 0) {
1400 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1401 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1402 AArch64InstrInfo::isSEHInstruction(*Pop) ||
1405 Pop = std::prev(Pop);
1406 // Converting the last ldp to a post-index ldp is valid only if the last
1407 // ldp's offset is 0.
1408 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1409 // If the offset is 0 and the AfterCSR pop is not actually trying to
1410 // allocate more stack for arguments (in space that an untimely interrupt
1411 // may clobber), convert it to a post-index ldp.
1412 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1414 Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1415 ProloguePopSize);
1417 MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
1418 if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
1419 ++AfterLastPop;
1420 // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1421 // callee-save non-SVE registers to move the stack pointer to the start of
1422 // the SVE area.
1423 emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
1424 StackOffset::getFixed(ProloguePopSize), TII,
1426 &HasWinCFI);
1427 } else {
1428 // Otherwise, make sure to emit an add after the last ldp.
1429 // We're doing this by transferring the size to be restored from the
1430 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1431 // pops.
1432 AfterCSRPopSize += ProloguePopSize;
1433 }
1434 }
1435
1436 // Move past the restores of the callee-saved registers.
1437 // If we plan on combining the sp bump of the local stack size and the callee
1438 // save stack size, we might need to adjust the CSR save and restore offsets.
1439 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1440 MachineBasicBlock::iterator Begin = MBB.begin();
1441 while (FirstGPRRestoreI != Begin) {
1442 --FirstGPRRestoreI;
1443 if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
1445 isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
1446 ++FirstGPRRestoreI;
1447 break;
1448 } else if (CombineSPBump)
1449 fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
1450 AFI->getLocalStackSize());
1451 }
1452
1453 if (NeedsWinCFI) {
1454 // Note that there are cases where we insert SEH opcodes in the
1455 // epilogue when we had no SEH opcodes in the prologue. For
1456 // example, when there is no stack frame but there are stack
1457 // arguments. Insert the SEH_EpilogStart and remove it later if it
1458 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1459 // functions that don't need it.
1460 BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
1462 SEHEpilogueStartI = FirstGPRRestoreI;
1463 --SEHEpilogueStartI;
1464 }
1465
1466 // Determine the ranges of SVE callee-saves. This is done before emitting any
1467 // code at the end of the epilogue (for Swift async), which can get in the way
1468 // of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
1469 auto [PPR, ZPR] = getSVEStackFrameSizes();
1470 auto [PPRRange, ZPRRange] = partitionSVECS(
1471 MBB,
1473 ? MBB.getFirstTerminator()
1474 : FirstGPRRestoreI,
1475 PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1476
1477 if (HasFP && AFI->hasSwiftAsyncContext())
1478 emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
1479
1480 // If there is a single SP update, insert it before the ret and we're done.
1481 if (CombineSPBump) {
1482 assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE");
1483
1484 // When we are about to restore the CSRs, the CFA register is SP again.
1485 if (EmitCFI && HasFP)
1487 .buildDefCFA(AArch64::SP, NumBytes);
1488
1489 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1490 StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
1492 EmitCFI, StackOffset::getFixed(NumBytes));
1493 return;
1494 }
1495
1496 NumBytes -= PrologueSaveSize;
1497 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1498
1499 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
1500 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
1501
1502 // Deallocate the SVE area.
1504 assert(!SVEAllocs.AfterPPRs &&
1505 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
1506 // If the callee-save area is before FP, restoring the FP implicitly
1507 // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
1508 // explicitly.
1509 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1510 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1512 false, NeedsWinCFI, &HasWinCFI);
1513 }
1514
1515 // Deallocate callee-save SVE registers.
1516 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1518 false, NeedsWinCFI, &HasWinCFI);
1519 } else if (AFI->hasSVEStackSize()) {
1520 // If we have stack realignment or variable-sized objects we must use the FP
1521 // to restore SVE callee saves (as there is an unknown amount of
1522 // data/padding between the SP and SVE CS area).
1523 Register BaseForSVEDealloc =
1524 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1525 : AArch64::SP;
1526 if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
1527 if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1528 // The offset from the frame-pointer to the start of the ZPR saves.
1529 StackOffset FPOffsetZPR =
1530 -SVECalleeSavesSize - PPR.LocalsSize -
1531 StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
1532 // Deallocate the stack space space by moving the SP to the start of the
1533 // ZPR/PPR callee-save area.
1534 moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1535 }
1536 // With split SVE, the predicates are stored in a separate area above the
1537 // ZPR saves, so we must adjust the stack to the start of the PPRs.
1538 if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1539 // The offset from the frame-pointer to the start of the PPR saves.
1540 StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
1541 // Move to the start of the PPR area.
1542 assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
1543 emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::FP,
1544 FPOffsetPPR, TII, MachineInstr::FrameDestroy);
1545 }
1546 } else if (BaseForSVEDealloc == AArch64::SP) {
1547 auto NonSVELocals = StackOffset::getFixed(NumBytes);
1548 auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
1549 SVEAllocs.totalSize();
1550
1551 if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
1552 // Deallocate non-SVE locals now. This is needed to reach the SVE callee
1553 // saves, but may also allow combining stack hazard bumps for split SVE.
1554 SVEAllocs.AfterZPRs += NonSVELocals;
1555 NumBytes -= NonSVELocals.getFixed();
1556 }
1557 // To deallocate the SVE stack adjust by the allocations in reverse.
1558 emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1560 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1561 CFAOffset);
1562 CFAOffset -= SVEAllocs.AfterZPRs;
1563 assert(PPRRange.Begin == ZPRRange.End &&
1564 "Expected PPR restores after ZPR");
1565 emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1567 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1568 CFAOffset);
1569 CFAOffset -= SVEAllocs.AfterPPRs;
1570 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1572 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1573 CFAOffset);
1574 }
1575
1576 if (EmitCFI)
1577 emitCalleeSavedSVERestores(
1578 SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End);
1579 }
1580
1581 if (!HasFP) {
1582 bool RedZone = AFL.canUseRedZone(MF);
1583 // If this was a redzone leaf function, we don't need to restore the
1584 // stack pointer (but we may need to pop stack args for fastcc).
1585 if (RedZone && AfterCSRPopSize == 0)
1586 return;
1587
1588 // Pop the local variables off the stack. If there are no callee-saved
1589 // registers, it means we are actually positioned at the terminator and can
1590 // combine stack increment for the locals and the stack increment for
1591 // callee-popped arguments into (possibly) a single instruction and be done.
1592 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1593 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1594 if (NoCalleeSaveRestore)
1595 StackRestoreBytes += AfterCSRPopSize;
1596
1598 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1599 StackOffset::getFixed(StackRestoreBytes), TII,
1601 StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
1602
1603 // If we were able to combine the local stack pop with the argument pop,
1604 // then we're done.
1605 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1606 return;
1607
1608 NumBytes = 0;
1609 }
1610
1611 // Restore the original stack pointer.
1612 // FIXME: Rather than doing the math here, we should instead just use
1613 // non-post-indexed loads for the restores if we aren't actually going to
1614 // be able to save any instructions.
1615 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1617 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
1618 StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
1620 } else if (NumBytes)
1621 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1622 StackOffset::getFixed(NumBytes), TII,
1624
1625 // When we are about to restore the CSRs, the CFA register is SP again.
1626 if (EmitCFI && HasFP)
1628 .buildDefCFA(AArch64::SP, PrologueSaveSize);
1629
1630 // This must be placed after the callee-save restore code because that code
1631 // assumes the SP is at the same location as it was after the callee-save save
1632 // code in the prologue.
1633 if (AfterCSRPopSize) {
1634 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1635 "interrupt may have clobbered");
1636
1638 MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1640 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1641 StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
1642 }
1643}
1644
1645bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1646 uint64_t StackBumpBytes) const {
1648 StackBumpBytes))
1649 return false;
1650 if (MBB.empty())
1651 return true;
1652
1653 // Disable combined SP bump if the last instruction is an MTE tag store. It
1654 // is almost always better to merge SP adjustment into those instructions.
1655 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1656 MachineBasicBlock::iterator Begin = MBB.begin();
1657 while (LastI != Begin) {
1658 --LastI;
1659 if (LastI->isTransient())
1660 continue;
1661 if (!LastI->getFlag(MachineInstr::FrameDestroy))
1662 break;
1663 }
1664 switch (LastI->getOpcode()) {
1665 case AArch64::STGloop:
1666 case AArch64::STZGloop:
1667 case AArch64::STGi:
1668 case AArch64::STZGi:
1669 case AArch64::ST2Gi:
1670 case AArch64::STZ2Gi:
1671 return false;
1672 default:
1673 return true;
1674 }
1675 llvm_unreachable("unreachable");
1676}
1677
1678void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1680 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1682 // Avoid the reload as it is GOT relative, and instead fall back to the
1683 // hardcoded value below. This allows a mismatch between the OS and
1684 // application without immediately terminating on the difference.
1685 [[fallthrough]];
1687 // We need to reset FP to its untagged state on return. Bit 60 is
1688 // currently used to show the presence of an extended frame.
1689
1690 // BIC x29, x29, #0x1000_0000_0000_0000
1691 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
1692 AArch64::FP)
1693 .addUse(AArch64::FP)
1694 .addImm(0x10fe)
1696 if (NeedsWinCFI) {
1697 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1699 HasWinCFI = true;
1700 }
1701 break;
1702
1704 break;
1705 }
1706}
1707
1708void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1710 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1711 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
1712 .addReg(AArch64::X18, RegState::Define)
1713 .addReg(AArch64::LR, RegState::Define)
1714 .addReg(AArch64::X18)
1715 .addImm(-8)
1717
1718 if (NeedsWinCFI)
1719 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1721
1722 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1723 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1724 .buildRestore(AArch64::X18);
1725}
1726
1727void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1728 MachineBasicBlock::iterator MBBI, bool SVE) const {
1729 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1730 if (CSI.empty())
1731 return;
1732
1733 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1734
1735 for (const auto &Info : CSI) {
1736 if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
1737 continue;
1738
1739 MCRegister Reg = Info.getReg();
1740 if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
1741 continue;
1742
1743 CFIBuilder.buildRestore(Info.getReg());
1744 }
1745}
1746
1747void AArch64EpilogueEmitter::finalizeEpilogue() const {
1748 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1749 emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
1751 }
1752 if (EmitCFI)
1753 emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
1754 if (AFI->shouldSignReturnAddress(MF)) {
1755 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1756 // are inserted by emitPacRetPlusLeafHardening().
1757 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1758 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1759 TII->get(AArch64::PAUTH_EPILOGUE))
1761 }
1762 // AArch64PointerAuth pass will insert SEH_PACSignLR
1764 }
1765 if (HasWinCFI) {
1766 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1768 if (!MF.hasWinCFI())
1769 MF.setHasWinCFI(true);
1770 }
1771 if (NeedsWinCFI) {
1772 assert(SEHEpilogueStartI != MBB.end());
1773 if (!HasWinCFI)
1774 MBB.erase(SEHEpilogueStartI);
1775 }
1776}
1777
1778} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains constants used for implementing Dwarf debug support.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static const unsigned FramePtr
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0) const
bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize) const
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const
SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &)
const AArch64TargetLowering * getTargetLowering() const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void insertCFIInst(const MCCFIInstruction &CFIInst) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
A debug info location.
Definition DebugLoc.h:123
A set of physical registers with utility functions to track liveness when walking backward/forward th...
MachineInstrBundleIterator< MachineInstr > iterator
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static MachineOperand CreateImm(int64_t Val)
const char * getSymbolName() const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, RTLIB::Libcall LC)
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I)
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I)
@ Success
The lock was released successfully.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
constexpr bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1748
static SVEPartitions partitionSVECS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, StackOffset PPRCalleeSavesSize, StackOffset ZPRCalleeSavesSize, bool IsEpilogue)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1779
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I)
static int64_t upperBound(StackOffset Size)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
MachineBasicBlock::iterator End
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 ZPR
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 PPR
MachineBasicBlock::iterator Begin