LLVM 23.0.0git
AArch64PrologueEpilogue.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "AArch64Subtarget.h"
14#include "llvm/ADT/Statistic.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
29}
30
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::PTRUE_C_B:
61 return I->getFlag(MachineInstr::FrameSetup) ||
63 case AArch64::SEH_SaveZReg:
64 return true;
65 }
66}
67
68// Convenience function to determine whether I is part of the PPR callee saves.
70 switch (I->getOpcode()) {
71 default:
72 return false;
73 case AArch64::STR_PXI:
74 case AArch64::LDR_PXI:
75 return I->getFlag(MachineInstr::FrameSetup) ||
77 case AArch64::SEH_SavePReg:
78 return true;
79 }
80}
81
82// Convenience function to determine whether I is part of the SVE callee saves.
86
90 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
91 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
92 RegInfo(*Subtarget.getRegisterInfo()) {
93 TII = Subtarget.getInstrInfo();
94 AFI = MF.getInfo<AArch64FunctionInfo>();
95
96 HasFP = AFL.hasFP(MF);
97 NeedsWinCFI = AFL.needsWinCFI(MF);
98
99 if (AFL.hasSVECalleeSavesAboveFrameRecord(MF)) {
100 if (AFI->hasStackHazardSlotIndex())
101 reportFatalUsageError("SME hazard padding is not supported on Windows");
102 SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
103 } else if (AFI->hasSplitSVEObjects()) {
104 SVELayout = SVEStackLayout::Split;
105 }
106}
107
110 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
111 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
112 unsigned NewOpc;
113
114 // If the function contains streaming mode changes, we expect instructions
115 // to calculate the value of VG before spilling. Move past these instructions
116 // if necessary.
117 if (AFL.requiresSaveVG(MF)) {
118 auto &TLI = *Subtarget.getTargetLowering();
119 while (isVGInstruction(MBBI, TLI))
120 ++MBBI;
121 }
122
123 switch (MBBI->getOpcode()) {
124 default:
125 llvm_unreachable("Unexpected callee-save save/restore opcode!");
126 case AArch64::STPXi:
127 NewOpc = AArch64::STPXpre;
128 break;
129 case AArch64::STPDi:
130 NewOpc = AArch64::STPDpre;
131 break;
132 case AArch64::STPQi:
133 NewOpc = AArch64::STPQpre;
134 break;
135 case AArch64::STRXui:
136 NewOpc = AArch64::STRXpre;
137 break;
138 case AArch64::STRDui:
139 NewOpc = AArch64::STRDpre;
140 break;
141 case AArch64::STRQui:
142 NewOpc = AArch64::STRQpre;
143 break;
144 case AArch64::LDPXi:
145 NewOpc = AArch64::LDPXpost;
146 break;
147 case AArch64::LDPDi:
148 NewOpc = AArch64::LDPDpost;
149 break;
150 case AArch64::LDPQi:
151 NewOpc = AArch64::LDPQpost;
152 break;
153 case AArch64::LDRXui:
154 NewOpc = AArch64::LDRXpost;
155 break;
156 case AArch64::LDRDui:
157 NewOpc = AArch64::LDRDpost;
158 break;
159 case AArch64::LDRQui:
160 NewOpc = AArch64::LDRQpost;
161 break;
162 }
163 TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
164 int64_t MinOffset, MaxOffset;
165 bool Success = TII->getMemOpInfo(NewOpc, Scale, Width, MinOffset, MaxOffset);
166 (void)Success;
167 assert(Success && "unknown load/store opcode");
168
169 // If the first store isn't right where we want SP then we can't fold the
170 // update in so create a normal arithmetic instruction instead.
171 //
172 // On Windows, some register pairs involving LR can't be folded because
173 // there isn't a corresponding unwind opcode.
174 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
175 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
176 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue() ||
177 (NeedsWinCFI &&
178 (NewOpc == AArch64::LDPXpost || NewOpc == AArch64::STPXpre) &&
179 RegInfo.getEncodingValue(MBBI->getOperand(0).getReg()) + 1 !=
180 RegInfo.getEncodingValue(MBBI->getOperand(1).getReg()))) {
181 // If we are destroying the frame, make sure we add the increment after the
182 // last frame operation.
183 if (FrameFlag == MachineInstr::FrameDestroy) {
184 ++MBBI;
185 // Also skip the SEH instruction, if needed
186 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
187 ++MBBI;
188 }
189 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
190 StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
191 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
192 StackOffset::getFixed(CFAOffset));
193
194 return std::prev(MBBI);
195 }
196
197 // Get rid of the SEH code associated with the old instruction.
198 if (NeedsWinCFI) {
199 auto SEH = std::next(MBBI);
200 if (AArch64InstrInfo::isSEHInstruction(*SEH))
201 SEH->eraseFromParent();
202 }
203
204 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
205 MIB.addReg(AArch64::SP, RegState::Define);
206
207 // Copy all operands other than the immediate offset.
208 unsigned OpndIdx = 0;
209 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
210 ++OpndIdx)
211 MIB.add(MBBI->getOperand(OpndIdx));
212
213 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
214 "Unexpected immediate offset in first/last callee-save save/restore "
215 "instruction!");
216 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
217 "Unexpected base register in callee-save save/restore instruction!");
218 assert(CSStackSizeInc % Scale == 0);
219 MIB.addImm(CSStackSizeInc / (int)Scale);
220
221 MIB.setMIFlags(MBBI->getFlags());
222 MIB.setMemRefs(MBBI->memoperands());
223
224 // Generate a new SEH code that corresponds to the new instruction.
225 if (NeedsWinCFI) {
226 HasWinCFI = true;
227 AFL.insertSEH(*MIB, *TII, FrameFlag);
228 }
229
230 if (EmitCFI)
231 CFIInstBuilder(MBB, MBBI, FrameFlag)
232 .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
233
234 return std::prev(MBB.erase(MBBI));
235}
236
237// Fix up the SEH opcode associated with the save/restore instruction.
239 unsigned LocalStackSize) {
240 MachineOperand *ImmOpnd = nullptr;
241 unsigned ImmIdx = MBBI->getNumOperands() - 1;
242 switch (MBBI->getOpcode()) {
243 default:
244 llvm_unreachable("Fix the offset in the SEH instruction");
245 case AArch64::SEH_SaveFPLR:
246 case AArch64::SEH_SaveRegP:
247 case AArch64::SEH_SaveReg:
248 case AArch64::SEH_SaveFRegP:
249 case AArch64::SEH_SaveFReg:
250 case AArch64::SEH_SaveAnyRegI:
251 case AArch64::SEH_SaveAnyRegIP:
252 case AArch64::SEH_SaveAnyRegQP:
253 case AArch64::SEH_SaveAnyRegQPX:
254 ImmOpnd = &MBBI->getOperand(ImmIdx);
255 break;
256 }
257 if (ImmOpnd)
258 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
259}
260
262 MachineInstr &MI, uint64_t LocalStackSize) const {
263 if (AArch64InstrInfo::isSEHInstruction(MI))
264 return;
265
266 unsigned Opc = MI.getOpcode();
267 unsigned Scale;
268 switch (Opc) {
269 case AArch64::STPXi:
270 case AArch64::STRXui:
271 case AArch64::STPDi:
272 case AArch64::STRDui:
273 case AArch64::LDPXi:
274 case AArch64::LDRXui:
275 case AArch64::LDPDi:
276 case AArch64::LDRDui:
277 Scale = 8;
278 break;
279 case AArch64::STPQi:
280 case AArch64::STRQui:
281 case AArch64::LDPQi:
282 case AArch64::LDRQui:
283 Scale = 16;
284 break;
285 default:
286 llvm_unreachable("Unexpected callee-save save/restore opcode!");
287 }
288
289 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
290 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
291 "Unexpected base register in callee-save save/restore instruction!");
292 // Last operand is immediate offset that needs fixing.
293 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
294 // All generated opcodes have scaled offsets.
295 assert(LocalStackSize % Scale == 0);
296 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
297
298 if (NeedsWinCFI) {
299 HasWinCFI = true;
300 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
301 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
302 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
303 "Expecting a SEH instruction");
304 fixupSEHOpcode(MBBI, LocalStackSize);
305 }
306}
307
309 uint64_t StackBumpBytes) const {
310 if (AFL.homogeneousPrologEpilog(MF))
311 return false;
312
313 if (AFI->getLocalStackSize() == 0)
314 return false;
315
316 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
317 // (to force a stp with predecrement) to match the packed unwind format,
318 // provided that there actually are any callee saved registers to merge the
319 // decrement with.
320 //
321 // Note that for certain paired saves, like "x19, lr", we can't actually
322 // emit an predecrement stp, but packed unwind still expects a separate stack
323 // adjustment.
324 //
325 // This is potentially marginally slower, but allows using the packed
326 // unwind format for functions that both have a local area and callee saved
327 // registers. Using the packed unwind format notably reduces the size of
328 // the unwind info.
329 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
330 MF.getFunction().hasOptSize())
331 return false;
332
333 // 512 is the maximum immediate for stp/ldp that will be used for
334 // callee-save save/restores
335 if (StackBumpBytes >= 512 ||
336 AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
337 return false;
338
339 if (MFI.hasVarSizedObjects())
340 return false;
341
342 if (RegInfo.hasStackRealignment(MF))
343 return false;
344
345 // This isn't strictly necessary, but it simplifies things a bit since the
346 // current RedZone handling code assumes the SP is adjusted by the
347 // callee-save save/restore code.
348 if (AFL.canUseRedZone(MF))
349 return false;
350
351 // When there is an SVE area on the stack, always allocate the
352 // callee-saves and spills/locals separately.
353 if (AFI->hasSVEStackSize())
354 return false;
355
356 return true;
357}
358
360 StackOffset PPRCalleeSavesSize =
361 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
362 StackOffset ZPRCalleeSavesSize =
363 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
364 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
365 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
367 return {{PPRCalleeSavesSize, PPRLocalsSize},
368 {ZPRCalleeSavesSize, ZPRLocalsSize}};
369 // For simplicity, attribute all locals to ZPRs when split SVE is disabled.
370 return {{PPRCalleeSavesSize, StackOffset{}},
371 {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
372}
373
375 SVEFrameSizes const &SVE) {
376 StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
377 StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
378 StackOffset AfterPPRs = {};
380 BeforePPRs = SVE.PPR.CalleeSavesSize;
381 // If there are no ZPR CSRs, place all local allocations after the ZPRs.
382 if (SVE.ZPR.CalleeSavesSize)
383 AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
384 else
385 AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
386 }
387 return {BeforePPRs, AfterPPRs, AfterZPRs};
388}
389
395
398 StackOffset PPRCalleeSavesSize,
399 StackOffset ZPRCalleeSavesSize,
400 bool IsEpilogue) {
403 IsEpilogue ? MBB.begin() : MBB.getFirstTerminator();
404 auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; };
405 // Process the SVE CS to find the starts/ends of the ZPR and PPR areas.
406 if (PPRCalleeSavesSize) {
407 PPRsI = AdjustI(PPRsI);
408 assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction");
409 while (PPRsI != End && isPartOfPPRCalleeSaves(AdjustI(PPRsI)))
410 IsEpilogue ? (--PPRsI) : (++PPRsI);
411 }
412 MachineBasicBlock::iterator ZPRsI = PPRsI;
413 if (ZPRCalleeSavesSize) {
414 ZPRsI = AdjustI(ZPRsI);
415 assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction");
416 while (ZPRsI != End && isPartOfZPRCalleeSaves(AdjustI(ZPRsI)))
417 IsEpilogue ? (--ZPRsI) : (++ZPRsI);
418 }
419 if (IsEpilogue)
420 return {{PPRsI, MBBI}, {ZPRsI, PPRsI}};
421 return {{MBBI, PPRsI}, {PPRsI, ZPRsI}};
422}
423
428 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
429 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
430 IsFunclet = MBB.isEHFuncletEntry();
431 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
432
433#ifndef NDEBUG
434 collectBlockLiveins();
435#endif
436}
437
438#ifndef NDEBUG
439/// Collect live registers from the end of \p MI's parent up to (including) \p
440/// MI in \p LiveRegs.
443
444 MachineBasicBlock &MBB = *MI.getParent();
445 LiveRegs.addLiveOuts(MBB);
446 for (const MachineInstr &MI :
447 reverse(make_range(MI.getIterator(), MBB.instr_end())))
448 LiveRegs.stepBackward(MI);
449}
450
451void AArch64PrologueEmitter::collectBlockLiveins() {
452 // Collect live register from the end of MBB up to the start of the existing
453 // frame setup instructions.
454 PrologueEndI = MBB.begin();
455 while (PrologueEndI != MBB.end() &&
456 PrologueEndI->getFlag(MachineInstr::FrameSetup))
457 ++PrologueEndI;
458
459 if (PrologueEndI != MBB.end()) {
460 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
461 // Ignore registers used for stack management for now.
462 LiveRegs.removeReg(AArch64::SP);
463 LiveRegs.removeReg(AArch64::X19);
464 LiveRegs.removeReg(AArch64::FP);
465 LiveRegs.removeReg(AArch64::LR);
466
467 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
468 // This is necessary to spill VG if required where SVE is unavailable, but
469 // X0 is preserved around this call.
470 if (requiresGetVGCall())
471 LiveRegs.removeReg(AArch64::X0);
472 }
473}
474
475void AArch64PrologueEmitter::verifyPrologueClobbers() const {
476 if (PrologueEndI == MBB.end())
477 return;
478 // Check if any of the newly instructions clobber any of the live registers.
479 for (MachineInstr &MI :
480 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
481 for (auto &Op : MI.operands())
482 if (Op.isReg() && Op.isDef())
483 assert(!LiveRegs.contains(Op.getReg()) &&
484 "live register clobbered by inserted prologue instructions");
485 }
486}
487#endif
488
489void AArch64PrologueEmitter::determineLocalsStackSize(
490 uint64_t StackSize, uint64_t PrologueSaveSize) {
491 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
492 CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
493}
494
495// Return the maximum possible number of bytes for `Size` due to the
496// architectural limit on the size of a SVE register.
497static int64_t upperBound(StackOffset Size) {
498 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
499 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
500}
501
502void AArch64PrologueEmitter::allocateStackSpace(
503 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
504 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
505 bool FollowupAllocs) {
506
507 if (!AllocSize)
508 return;
509
510 DebugLoc DL;
511 const int64_t MaxAlign = MFI.getMaxAlign().value();
512 const uint64_t AndMask = ~(MaxAlign - 1);
513
515 Register TargetReg = RealignmentPadding
516 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
517 : AArch64::SP;
518 // SUB Xd/SP, SP, AllocSize
519 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
521 EmitCFI, InitialOffset);
522
523 if (RealignmentPadding) {
524 // AND SP, X9, 0b11111...0000
525 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
526 .addReg(TargetReg, RegState::Kill)
529 AFI->setStackRealigned(true);
530
531 // No need for SEH instructions here; if we're realigning the stack,
532 // we've set a frame pointer and already finished the SEH prologue.
534 }
535 return;
536 }
537
538 //
539 // Stack probing allocation.
540 //
541
542 // Fixed length allocation. If we don't need to re-align the stack and don't
543 // have SVE objects, we can use a more efficient sequence for stack probing.
544 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
545 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
546 assert(ScratchReg != AArch64::NoRegister);
547 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
548 .addDef(ScratchReg)
549 .addImm(AllocSize.getFixed())
550 .addImm(InitialOffset.getFixed())
551 .addImm(InitialOffset.getScalable());
552 // The fixed allocation may leave unprobed bytes at the top of the
553 // stack. If we have subsequent allocation (e.g. if we have variable-sized
554 // objects), we need to issue an extra probe, so these allocations start in
555 // a known state.
556 if (FollowupAllocs) {
557 // LDR XZR, [SP]
558 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
559 .addDef(AArch64::XZR)
560 .addReg(AArch64::SP)
561 .addImm(0)
562 .addMemOperand(MF.getMachineMemOperand(
565 Align(8)))
567 }
568
569 return;
570 }
571
572 // Variable length allocation.
573
574 // If the (unknown) allocation size cannot exceed the probe size, decrement
575 // the stack pointer right away.
576 int64_t ProbeSize = AFI->getStackProbeSize();
577 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
578 Register ScratchReg = RealignmentPadding
579 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
580 : AArch64::SP;
581 assert(ScratchReg != AArch64::NoRegister);
582 // SUB Xd, SP, AllocSize
583 emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
585 EmitCFI, InitialOffset);
586 if (RealignmentPadding) {
587 // AND SP, Xn, 0b11111...0000
588 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
589 .addReg(ScratchReg, RegState::Kill)
592 AFI->setStackRealigned(true);
593 }
594 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
596 // LDR XZR, [SP]
597 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
598 .addDef(AArch64::XZR)
599 .addReg(AArch64::SP)
600 .addImm(0)
601 .addMemOperand(MF.getMachineMemOperand(
604 Align(8)))
606 }
607 return;
608 }
609
610 // Emit a variable-length allocation probing loop.
611 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
612 // each of them guaranteed to adjust the stack by less than the probe size.
613 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
614 assert(TargetReg != AArch64::NoRegister);
615 // SUB Xd, SP, AllocSize
616 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
618 EmitCFI, InitialOffset);
619 if (RealignmentPadding) {
620 // AND Xn, Xn, 0b11111...0000
621 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
622 .addReg(TargetReg, RegState::Kill)
625 }
626
627 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
628 .addReg(TargetReg);
629 if (EmitCFI) {
630 // Set the CFA register back to SP.
631 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
632 .buildDefCFARegister(AArch64::SP);
633 }
634 if (RealignmentPadding)
635 AFI->setStackRealigned(true);
636}
637
639 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
640 const MachineBasicBlock::iterator EndI = MBB.end();
641
642 // At this point, we're going to decide whether or not the function uses a
643 // redzone. In most cases, the function doesn't have a redzone so let's
644 // assume that's false and set it to true in the case that there's a redzone.
645 AFI->setHasRedZone(false);
646
647 // Debug location must be unknown since the first debug location is used
648 // to determine the end of the prologue.
649 DebugLoc DL;
650
651 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
652 // have a tail-call where the caller only needs to adjust the stack pointer in
653 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
654 // See `seh-minimal-prologue-epilogue.ll` test cases.
655 if (AFI->getArgumentStackToRestore())
657
658 if (AFI->shouldSignReturnAddress(MF)) {
659 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
660 // are inserted by emitPacRetPlusLeafHardening().
661 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
662 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
664 }
665 // AArch64PointerAuth pass will insert SEH_PACSignLR
667 }
668
669 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
670 emitShadowCallStackPrologue(PrologueBeginI, DL);
672 }
673
674 if (EmitCFI && AFI->isMTETagged())
675 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
677
678 // We signal the presence of a Swift extended frame to external tools by
679 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
680 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
681 // bits so that is still true.
682 if (HasFP && AFI->hasSwiftAsyncContext())
683 emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
684
685 // All calls are tail calls in GHC calling conv, and functions have no
686 // prologue/epilogue.
687 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
688 return;
689
690 // Set tagged base pointer to the requested stack slot. Ideally it should
691 // match SP value after prologue.
692 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
693 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
694 else
695 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
696
697 // getStackSize() includes all the locals in its size calculation. We don't
698 // include these locals when computing the stack size of a funclet, as they
699 // are allocated in the parent's stack frame and accessed via the frame
700 // pointer from the funclet. We only save the callee saved registers in the
701 // funclet, which are really the callee saved registers of the parent
702 // function, including the funclet.
703 int64_t NumBytes =
704 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
705 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
706 return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
707
708 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
709 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
710
711 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
712 // All of the remaining stack allocations are for locals.
713 determineLocalsStackSize(NumBytes, PrologueSaveSize);
714
715 auto [PPR, ZPR] = getSVEStackFrameSizes();
716 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
717
718 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
720 assert(!SVEAllocs.AfterPPRs &&
721 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
722 // If we're doing SVE saves first, we need to immediately allocate space
723 // for fixed objects, then space for the SVE callee saves.
724 //
725 // Windows unwind requires that the scalable size is a multiple of 16;
726 // that's handled when the callee-saved size is computed.
727 auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
728 allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
729 /*FollowupAllocs=*/true);
730 NumBytes -= FixedObject;
731
732 // Now allocate space for the GPR callee saves.
733 MachineBasicBlock::iterator MBBI = PrologueBeginI;
734 while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
735 ++MBBI;
737 MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
738 NumBytes -= AFI->getCalleeSavedStackSize();
739 } else if (CombineSPBump) {
740 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
741 emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
742 StackOffset::getFixed(-NumBytes), TII,
744 EmitAsyncCFI);
745 NumBytes = 0;
746 } else if (HomPrologEpilog) {
747 // Stack has been already adjusted.
748 NumBytes -= PrologueSaveSize;
749 } else if (PrologueSaveSize != 0) {
751 PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
752 NumBytes -= PrologueSaveSize;
753 }
754 assert(NumBytes >= 0 && "Negative stack allocation size!?");
755
756 // Move past the saves of the callee-saved registers, fixing up the offsets
757 // and pre-inc if we decided to combine the callee-save and local stack
758 // pointer bump above.
759 auto &TLI = *Subtarget.getTargetLowering();
760
761 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
762 while (AfterGPRSavesI != EndI &&
763 AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
764 !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
765 if (CombineSPBump &&
766 // Only fix-up frame-setup load/store instructions.
767 (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
768 fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
769 AFI->getLocalStackSize());
770 ++AfterGPRSavesI;
771 }
772
773 // For funclets the FP belongs to the containing function. Only set up FP if
774 // we actually need to.
775 if (!IsFunclet && HasFP)
776 emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
777
778 // Now emit the moves for whatever callee saved regs we have (including FP,
779 // LR if those are saved). Frame instructions for SVE register are emitted
780 // later, after the instruction which actually save SVE regs.
781 if (EmitAsyncCFI)
782 emitCalleeSavedGPRLocations(AfterGPRSavesI);
783
784 // Alignment is required for the parent frame, not the funclet
785 const bool NeedsRealignment =
786 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
787 const int64_t RealignmentPadding =
788 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
789 ? MFI.getMaxAlign().value() - 16
790 : 0;
791
792 if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
793 emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
794
795 StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
796 SVEAllocs.AfterZPRs += NonSVELocalsSize;
797
798 StackOffset CFAOffset =
799 StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
800 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
801 // Allocate space for the callee saves and PPR locals (if any).
803 auto [PPRRange, ZPRRange] =
804 partitionSVECS(MBB, AfterGPRSavesI, PPR.CalleeSavesSize,
805 ZPR.CalleeSavesSize, /*IsEpilogue=*/false);
806 AfterSVESavesI = ZPRRange.End;
807 if (EmitAsyncCFI)
808 emitCalleeSavedSVELocations(AfterSVESavesI);
809
810 allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
811 EmitAsyncCFI && !HasFP, CFAOffset,
812 MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
813 SVEAllocs.AfterZPRs);
814 CFAOffset += SVEAllocs.BeforePPRs;
815 assert(PPRRange.End == ZPRRange.Begin &&
816 "Expected ZPR callee saves after PPR locals");
817 allocateStackSpace(PPRRange.End, 0, SVEAllocs.AfterPPRs,
818 EmitAsyncCFI && !HasFP, CFAOffset,
819 MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
820 CFAOffset += SVEAllocs.AfterPPRs;
821 } else {
823 // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
824 // already been allocated. PPR locals (included in AfterPPRs) are not
825 // supported (note: this is asserted above).
826 CFAOffset += SVEAllocs.BeforePPRs;
827 }
828
829 // Allocate space for the rest of the frame including ZPR locals. Align the
830 // stack as necessary.
831 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
832 "Cannot use redzone with stack realignment");
833 if (!AFL.canUseRedZone(MF)) {
834 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
835 // correct value here, as NumBytes also includes padding bytes, which
836 // shouldn't be counted here.
837 allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
838 EmitAsyncCFI && !HasFP, CFAOffset,
839 MFI.hasVarSizedObjects());
840 }
841
842 // If we need a base pointer, set it up here. It's whatever the value of the
843 // stack pointer is at this point. Any variable size objects will be
844 // allocated after this, so we can still use the base pointer to reference
845 // locals.
846 //
847 // FIXME: Clarify FrameSetup flags here.
848 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
849 // needed.
850 // For funclets the BP belongs to the containing function.
851 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
852 TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
853 AArch64::SP, false);
854 if (NeedsWinCFI) {
855 HasWinCFI = true;
856 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
858 }
859 }
860
861 // The very last FrameSetup instruction indicates the end of prologue. Emit a
862 // SEH opcode indicating the prologue end.
863 if (NeedsWinCFI && HasWinCFI) {
864 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
866 }
867
868 // SEH funclets are passed the frame pointer in X1. If the parent
869 // function uses the base register, then the base register is used
870 // directly, and is not retrieved from X1.
871 if (IsFunclet && F.hasPersonalityFn()) {
872 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
874 BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
875 AArch64::FP)
876 .addReg(AArch64::X1)
878 MBB.addLiveIn(AArch64::X1);
879 }
880 }
881
882 if (EmitCFI && !EmitAsyncCFI) {
883 if (HasFP) {
884 emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
885 } else {
886 StackOffset TotalSize =
887 AFL.getSVEStackSize(MF) +
888 StackOffset::getFixed((int64_t)MFI.getStackSize());
889 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
890 CFIBuilder.insertCFIInst(
891 createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
892 TotalSize, /*LastAdjustmentWasScalable=*/false));
893 }
894 emitCalleeSavedGPRLocations(AfterSVESavesI);
895 emitCalleeSavedSVELocations(AfterSVESavesI);
896 }
897}
898
899void AArch64PrologueEmitter::emitShadowCallStackPrologue(
901 // Shadow call stack prolog: str x30, [x18], #8
902 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
903 .addReg(AArch64::X18, RegState::Define)
904 .addReg(AArch64::LR)
905 .addReg(AArch64::X18)
906 .addImm(8)
908
909 // This instruction also makes x18 live-in to the entry block.
910 MBB.addLiveIn(AArch64::X18);
911
912 if (NeedsWinCFI)
913 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
915
916 if (EmitCFI) {
917 // Emit a CFI instruction that causes 8 to be subtracted from the value of
918 // x18 when unwinding past this frame.
919 static const char CFIInst[] = {
920 dwarf::DW_CFA_val_expression,
921 18, // register
922 2, // length
923 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
924 static_cast<char>(-8) & 0x7f, // addend (sleb128)
925 };
926 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
927 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
928 }
929}
930
931void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
933 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
935 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
936 // The special symbol below is absolute and has a *value* that can be
937 // combined with the frame pointer to signal an extended frame.
938 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
939 .addExternalSymbol("swift_async_extendedFramePointerFlags",
941 if (NeedsWinCFI) {
942 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
944 HasWinCFI = true;
945 }
946 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
947 .addUse(AArch64::FP)
948 .addUse(AArch64::X16)
949 .addImm(Subtarget.isTargetILP32() ? 32 : 0);
950 if (NeedsWinCFI) {
951 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
953 HasWinCFI = true;
954 }
955 break;
956 }
957 [[fallthrough]];
958
960 // ORR x29, x29, #0x1000_0000_0000_0000
961 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
962 .addUse(AArch64::FP)
963 .addImm(0x1100)
965 if (NeedsWinCFI) {
966 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
968 HasWinCFI = true;
969 }
970 break;
971
973 break;
974 }
975}
976
977void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
978 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
979 const DebugLoc &DL) const {
980 assert(!HasFP && "unexpected function without stack frame but with FP");
981 assert(!AFL.getSVEStackSize(MF) &&
982 "unexpected function without stack frame but with SVE objects");
983 // All of the stack allocation is for locals.
984 AFI->setLocalStackSize(NumBytes);
985 if (!NumBytes) {
986 if (NeedsWinCFI && HasWinCFI) {
987 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
989 }
990 return;
991 }
992 // REDZONE: If the stack size is less than 128 bytes, we don't need
993 // to actually allocate.
994 if (AFL.canUseRedZone(MF)) {
995 AFI->setHasRedZone(true);
996 ++NumRedZoneFunctions;
997 } else {
998 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
999 StackOffset::getFixed(-NumBytes), TII,
1001 if (EmitCFI) {
1002 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
1003 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
1004 // Encode the stack size of the leaf function.
1005 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1006 .buildDefCFAOffset(NumBytes, FrameLabel);
1007 }
1008 }
1009
1010 if (NeedsWinCFI) {
1011 HasWinCFI = true;
1012 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1014 }
1015}
1016
1017void AArch64PrologueEmitter::emitFramePointerSetup(
1019 unsigned FixedObject) {
1020 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
1021 if (CombineSPBump)
1022 FPOffset += AFI->getLocalStackSize();
1023
1024 if (AFI->hasSwiftAsyncContext()) {
1025 // Before we update the live FP we have to ensure there's a valid (or
1026 // null) asynchronous context in its slot just before FP in the frame
1027 // record, so store it now.
1028 const auto &Attrs = MF.getFunction().getAttributes();
1029 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1030 if (HaveInitialContext)
1031 MBB.addLiveIn(AArch64::X22);
1032 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1033 BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
1034 .addUse(Reg)
1035 .addUse(AArch64::SP)
1036 .addImm(FPOffset - 8)
1038 if (NeedsWinCFI) {
1039 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1040 // to multiple instructions, should be mutually-exclusive.
1041 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1042 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1044 HasWinCFI = true;
1045 }
1046 }
1047
1048 if (HomPrologEpilog) {
1049 auto Prolog = MBBI;
1050 --Prolog;
1051 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1052 Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1053 } else {
1054 // Issue sub fp, sp, FPOffset or
1055 // mov fp,sp when FPOffset is zero.
1056 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1057 // This code marks the instruction(s) that set the FP also.
1058 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1059 StackOffset::getFixed(FPOffset), TII,
1061 if (NeedsWinCFI && HasWinCFI) {
1062 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1064 // After setting up the FP, the rest of the prolog doesn't need to be
1065 // included in the SEH unwind info.
1066 NeedsWinCFI = false;
1067 }
1068 }
1069 if (EmitAsyncCFI)
1070 emitDefineCFAWithFP(MBBI, FixedObject);
1071}
1072
1073// Define the current CFA rule to use the provided FP.
1074void AArch64PrologueEmitter::emitDefineCFAWithFP(
1075 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1076 const int OffsetToFirstCalleeSaveFromFP =
1077 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1078 AFI->getCalleeSavedStackSize();
1079 Register FramePtr = RegInfo.getFrameRegister(MF);
1080 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1081 .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
1082}
1083
1084void AArch64PrologueEmitter::emitWindowsStackProbe(
1085 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1086 int64_t RealignmentPadding) const {
1087 if (AFI->getSVECalleeSavedStackSize())
1088 report_fatal_error("SVE callee saves not yet supported with stack probing");
1089
1090 // Find an available register to spill the value of X15 to, if X15 is being
1091 // used already for nest.
1092 unsigned X15Scratch = AArch64::NoRegister;
1093 if (llvm::any_of(MBB.liveins(),
1094 [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1095 return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
1096 LiveIn.PhysReg);
1097 })) {
1098 X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
1099 assert(X15Scratch != AArch64::NoRegister &&
1100 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1101#ifndef NDEBUG
1102 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1103#endif
1104 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
1105 .addReg(AArch64::XZR)
1106 .addReg(AArch64::X15, RegState::Undef)
1107 .addReg(AArch64::X15, RegState::Implicit)
1109 }
1110
1111 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1112 if (NeedsWinCFI) {
1113 HasWinCFI = true;
1114 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1115 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1116 // This is at most two instructions, MOVZ followed by MOVK.
1117 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1118 // exceeding 256MB in size.
1119 if (NumBytes >= (1 << 28))
1120 report_fatal_error("Stack size cannot exceed 256MB for stack "
1121 "unwinding purposes");
1122
1123 uint32_t LowNumWords = NumWords & 0xFFFF;
1124 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1125 .addImm(LowNumWords)
1128 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1130 if ((NumWords & 0xFFFF0000) != 0) {
1131 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1132 .addReg(AArch64::X15)
1133 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1136 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1138 }
1139 } else {
1140 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1141 .addImm(NumWords)
1143 }
1144
1145 const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
1146 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(RTLIB::STACK_PROBE);
1147 if (ChkStkLibcall == RTLIB::Unsupported)
1148 reportFatalUsageError("no available implementation of __chkstk");
1149
1150 const char *ChkStk = TLI->getLibcallImplName(ChkStkLibcall).data();
1151 switch (MF.getTarget().getCodeModel()) {
1152 case CodeModel::Tiny:
1153 case CodeModel::Small:
1154 case CodeModel::Medium:
1155 case CodeModel::Kernel:
1156 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1157 .addExternalSymbol(ChkStk)
1158 .addReg(AArch64::X15, RegState::Implicit)
1159 .addReg(AArch64::X16,
1161 .addReg(AArch64::X17,
1163 .addReg(AArch64::NZCV,
1166 if (NeedsWinCFI) {
1167 HasWinCFI = true;
1168 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1170 }
1171 break;
1172 case CodeModel::Large:
1173 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1174 .addReg(AArch64::X16, RegState::Define)
1175 .addExternalSymbol(ChkStk)
1176 .addExternalSymbol(ChkStk)
1178 if (NeedsWinCFI) {
1179 HasWinCFI = true;
1180 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1182 }
1183
1185 .addReg(AArch64::X16, RegState::Kill)
1187 .addReg(AArch64::X16,
1189 .addReg(AArch64::X17,
1191 .addReg(AArch64::NZCV,
1194 if (NeedsWinCFI) {
1195 HasWinCFI = true;
1196 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1198 }
1199 break;
1200 }
1201
1202 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1203 .addReg(AArch64::SP, RegState::Kill)
1204 .addReg(AArch64::X15, RegState::Kill)
1207 if (NeedsWinCFI) {
1208 HasWinCFI = true;
1209 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1210 .addImm(NumBytes)
1212 }
1213 NumBytes = 0;
1214
1215 if (RealignmentPadding > 0) {
1216 if (RealignmentPadding >= 4096) {
1217 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
1218 .addReg(AArch64::X16, RegState::Define)
1219 .addImm(RealignmentPadding)
1221 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
1222 .addReg(AArch64::SP)
1223 .addReg(AArch64::X16, RegState::Kill)
1226 } else {
1227 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
1228 .addReg(AArch64::SP)
1229 .addImm(RealignmentPadding)
1230 .addImm(0)
1232 }
1233
1234 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1235 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1236 .addReg(AArch64::X15, RegState::Kill)
1238 AFI->setStackRealigned(true);
1239
1240 // No need for SEH instructions here; if we're realigning the stack,
1241 // we've set a frame pointer and already finished the SEH prologue.
1243 }
1244 if (X15Scratch != AArch64::NoRegister) {
1245 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
1246 .addReg(AArch64::XZR)
1247 .addReg(X15Scratch, RegState::Undef)
1248 .addReg(X15Scratch, RegState::Implicit)
1250 }
1251}
1252
1253void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1255 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1256 if (CSI.empty())
1257 return;
1258
1259 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1260 for (const auto &Info : CSI) {
1261 unsigned FrameIdx = Info.getFrameIdx();
1262 if (MFI.hasScalableStackID(FrameIdx))
1263 continue;
1264
1265 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1266 int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
1267 CFIBuilder.buildOffset(Info.getReg(), Offset);
1268 }
1269}
1270
1271void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1273 // Add callee saved registers to move list.
1274 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1275 if (CSI.empty())
1276 return;
1277
1278 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1279
1280 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1281 if (AFL.requiresSaveVG(MF)) {
1282 auto IncomingVG = *find_if(
1283 reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
1284 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
1285 AFL.getOffsetOfLocalArea();
1286 }
1287
1288 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1289 for (const auto &Info : CSI) {
1290 int FI = Info.getFrameIdx();
1291 if (!MFI.hasScalableStackID(FI))
1292 continue;
1293
1294 // Not all unwinders may know about SVE registers, so assume the lowest
1295 // common denominator.
1296 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1297 MCRegister Reg = Info.getReg();
1298 if (!RegInfo.regNeedsCFI(Reg, Reg))
1299 continue;
1300
1301 StackOffset Offset =
1302 StackOffset::getScalable(MFI.getObjectOffset(FI)) -
1303 StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
1304
1305 // The scalable vectors are below (lower address) the scalable predicates
1306 // with split SVE objects, so we must subtract the size of the predicates.
1308 MFI.getStackID(FI) == TargetStackID::ScalableVector)
1309 Offset -= PPRStackSize;
1310
1311 CFIBuilder.insertCFIInst(
1312 createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
1313 }
1314}
1315
1317 switch (MI.getOpcode()) {
1318 default:
1319 return false;
1320 case AArch64::CATCHRET:
1321 case AArch64::CLEANUPRET:
1322 return true;
1323 }
1324}
1325
1330 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1331 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
1332 SEHEpilogueStartI = MBB.end();
1333}
1334
1335void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
1337 // Other combinations could be supported, but are not currently needed.
1338 assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
1339 "expected negative offset (with optional fixed portion)");
1340 Register Base = AArch64::FP;
1341 if (int64_t FixedOffset = Offset.getFixed()) {
1342 // If we have a negative fixed offset, we need to first subtract it in a
1343 // temporary register first (to avoid briefly deallocating the scalable
1344 // portion of the offset).
1345 Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
1346 emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP,
1347 StackOffset::getFixed(FixedOffset), TII,
1349 }
1350 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base,
1351 StackOffset::getScalable(Offset.getScalable()), TII,
1353}
1354
1356 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1357 if (MBB.end() != EpilogueEndI) {
1358 DL = EpilogueEndI->getDebugLoc();
1359 IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
1360 }
1361
1362 int64_t NumBytes =
1363 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1364
1365 // All calls are tail calls in GHC calling conv, and functions have no
1366 // prologue/epilogue.
1367 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1368 return;
1369
1370 // How much of the stack used by incoming arguments this function is expected
1371 // to restore in this particular epilogue.
1372 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1373 bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
1374 MF.getFunction().isVarArg());
1375 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1376
1377 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1378 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1379 // We cannot rely on the local stack size set in emitPrologue if the function
1380 // has funclets, as funclets have different local stack size requirements, and
1381 // the current value set in emitPrologue may be that of the containing
1382 // function.
1383 if (MF.hasEHFunclets())
1384 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1385
1386 if (HomPrologEpilog) {
1388 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1389 if (FirstHomogenousEpilogI != MBB.begin()) {
1390 auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
1391 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1392 FirstHomogenousEpilogI = HomogeneousEpilog;
1393 }
1394
1395 // Adjust local stack
1396 emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
1397 StackOffset::getFixed(AFI->getLocalStackSize()), TII,
1399
1400 // SP has been already adjusted while restoring callee save regs.
1401 // We've bailed-out the case with adjusting SP for arguments.
1402 assert(AfterCSRPopSize == 0);
1403 return;
1404 }
1405
1406 bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1407
1408 unsigned ProloguePopSize = PrologueSaveSize;
1410 // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1411 // that needs to be popped until we reach the start of the SVE save area.
1412 // The "FixedObject" stack occurs after the SVE area and must be popped
1413 // later.
1414 ProloguePopSize -= FixedObject;
1415 AfterCSRPopSize += FixedObject;
1416 }
1417
1418 // Assume we can't combine the last pop with the sp restore.
1419 if (!CombineSPBump && ProloguePopSize != 0) {
1420 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1421 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1422 AArch64InstrInfo::isSEHInstruction(*Pop) ||
1425 Pop = std::prev(Pop);
1426 // Converting the last ldp to a post-index ldp is valid only if the last
1427 // ldp's offset is 0.
1428 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1429 // If the offset is 0 and the AfterCSR pop is not actually trying to
1430 // allocate more stack for arguments (in space that an untimely interrupt
1431 // may clobber), convert it to a post-index ldp.
1432 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1434 Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1435 ProloguePopSize);
1437 MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
1438 if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
1439 ++AfterLastPop;
1440 // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1441 // callee-save non-SVE registers to move the stack pointer to the start of
1442 // the SVE area.
1443 emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
1444 StackOffset::getFixed(ProloguePopSize), TII,
1446 &HasWinCFI);
1447 } else {
1448 // Otherwise, make sure to emit an add after the last ldp.
1449 // We're doing this by transferring the size to be restored from the
1450 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1451 // pops.
1452 AfterCSRPopSize += ProloguePopSize;
1453 }
1454 }
1455
1456 // Move past the restores of the callee-saved registers.
1457 // If we plan on combining the sp bump of the local stack size and the callee
1458 // save stack size, we might need to adjust the CSR save and restore offsets.
1459 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1460 MachineBasicBlock::iterator Begin = MBB.begin();
1461 while (FirstGPRRestoreI != Begin) {
1462 --FirstGPRRestoreI;
1463 if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
1465 isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
1466 ++FirstGPRRestoreI;
1467 break;
1468 } else if (CombineSPBump)
1469 fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
1470 AFI->getLocalStackSize());
1471 }
1472
1473 if (NeedsWinCFI) {
1474 // Note that there are cases where we insert SEH opcodes in the
1475 // epilogue when we had no SEH opcodes in the prologue. For
1476 // example, when there is no stack frame but there are stack
1477 // arguments. Insert the SEH_EpilogStart and remove it later if it
1478 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1479 // functions that don't need it.
1480 BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
1482 SEHEpilogueStartI = FirstGPRRestoreI;
1483 --SEHEpilogueStartI;
1484 }
1485
1486 // Determine the ranges of SVE callee-saves. This is done before emitting any
1487 // code at the end of the epilogue (for Swift async), which can get in the way
1488 // of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
1489 auto [PPR, ZPR] = getSVEStackFrameSizes();
1490 auto [PPRRange, ZPRRange] = partitionSVECS(
1491 MBB,
1493 ? MBB.getFirstTerminator()
1494 : FirstGPRRestoreI,
1495 PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1496
1497 if (HasFP && AFI->hasSwiftAsyncContext())
1498 emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
1499
1500 // If there is a single SP update, insert it before the ret and we're done.
1501 if (CombineSPBump) {
1502 assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE");
1503
1504 // When we are about to restore the CSRs, the CFA register is SP again.
1505 if (EmitCFI && HasFP)
1507 .buildDefCFA(AArch64::SP, NumBytes);
1508
1509 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1510 StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
1512 EmitCFI, StackOffset::getFixed(NumBytes));
1513 return;
1514 }
1515
1516 NumBytes -= PrologueSaveSize;
1517 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1518
1519 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
1520 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
1521
1522 // Deallocate the SVE area.
1524 assert(!SVEAllocs.AfterPPRs &&
1525 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
1526 // If the callee-save area is before FP, restoring the FP implicitly
1527 // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
1528 // explicitly.
1529 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1530 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1532 false, NeedsWinCFI, &HasWinCFI);
1533 }
1534
1535 // Deallocate callee-save SVE registers.
1536 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1538 false, NeedsWinCFI, &HasWinCFI);
1539 } else if (AFI->hasSVEStackSize()) {
1540 // If we have stack realignment or variable-sized objects we must use the FP
1541 // to restore SVE callee saves (as there is an unknown amount of
1542 // data/padding between the SP and SVE CS area).
1543 Register BaseForSVEDealloc =
1544 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1545 : AArch64::SP;
1546 if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
1547 if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1548 // The offset from the frame-pointer to the start of the ZPR saves.
1549 StackOffset FPOffsetZPR =
1550 -SVECalleeSavesSize - PPR.LocalsSize -
1551 StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
1552 // Deallocate the stack space space by moving the SP to the start of the
1553 // ZPR/PPR callee-save area.
1554 moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1555 }
1556 // With split SVE, the predicates are stored in a separate area above the
1557 // ZPR saves, so we must adjust the stack to the start of the PPRs.
1558 if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1559 // The offset from the frame-pointer to the start of the PPR saves.
1560 StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
1561 // Move to the start of the PPR area.
1562 assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
1563 emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::FP,
1564 FPOffsetPPR, TII, MachineInstr::FrameDestroy);
1565 }
1566 } else if (BaseForSVEDealloc == AArch64::SP) {
1567 auto NonSVELocals = StackOffset::getFixed(NumBytes);
1568 auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
1569 SVEAllocs.totalSize();
1570
1571 if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
1572 // Deallocate non-SVE locals now. This is needed to reach the SVE callee
1573 // saves, but may also allow combining stack hazard bumps for split SVE.
1574 SVEAllocs.AfterZPRs += NonSVELocals;
1575 NumBytes -= NonSVELocals.getFixed();
1576 }
1577 // To deallocate the SVE stack adjust by the allocations in reverse.
1578 emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1580 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1581 CFAOffset);
1582 CFAOffset -= SVEAllocs.AfterZPRs;
1583 assert(PPRRange.Begin == ZPRRange.End &&
1584 "Expected PPR restores after ZPR");
1585 emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1587 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1588 CFAOffset);
1589 CFAOffset -= SVEAllocs.AfterPPRs;
1590 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1592 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1593 CFAOffset);
1594 }
1595
1596 if (EmitCFI)
1597 emitCalleeSavedSVERestores(
1598 SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End);
1599 }
1600
1601 if (!HasFP) {
1602 bool RedZone = AFL.canUseRedZone(MF);
1603 // If this was a redzone leaf function, we don't need to restore the
1604 // stack pointer (but we may need to pop stack args for fastcc).
1605 if (RedZone && AfterCSRPopSize == 0)
1606 return;
1607
1608 // Pop the local variables off the stack. If there are no callee-saved
1609 // registers, it means we are actually positioned at the terminator and can
1610 // combine stack increment for the locals and the stack increment for
1611 // callee-popped arguments into (possibly) a single instruction and be done.
1612 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1613 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1614 if (NoCalleeSaveRestore)
1615 StackRestoreBytes += AfterCSRPopSize;
1616
1618 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1619 StackOffset::getFixed(StackRestoreBytes), TII,
1621 StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
1622
1623 // If we were able to combine the local stack pop with the argument pop,
1624 // then we're done.
1625 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1626 return;
1627
1628 NumBytes = 0;
1629 }
1630
1631 // Restore the original stack pointer.
1632 // FIXME: Rather than doing the math here, we should instead just use
1633 // non-post-indexed loads for the restores if we aren't actually going to
1634 // be able to save any instructions.
1635 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1637 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
1638 StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
1640 } else if (NumBytes)
1641 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1642 StackOffset::getFixed(NumBytes), TII,
1644
1645 // When we are about to restore the CSRs, the CFA register is SP again.
1646 if (EmitCFI && HasFP)
1648 .buildDefCFA(AArch64::SP, PrologueSaveSize);
1649
1650 // This must be placed after the callee-save restore code because that code
1651 // assumes the SP is at the same location as it was after the callee-save save
1652 // code in the prologue.
1653 if (AfterCSRPopSize) {
1654 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1655 "interrupt may have clobbered");
1656
1658 MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1660 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1661 StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
1662 }
1663}
1664
1665bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1666 uint64_t StackBumpBytes) const {
1668 StackBumpBytes))
1669 return false;
1670 if (MBB.empty())
1671 return true;
1672
1673 // Disable combined SP bump if the last instruction is an MTE tag store. It
1674 // is almost always better to merge SP adjustment into those instructions.
1675 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1676 MachineBasicBlock::iterator Begin = MBB.begin();
1677 while (LastI != Begin) {
1678 --LastI;
1679 if (LastI->isTransient())
1680 continue;
1681 if (!LastI->getFlag(MachineInstr::FrameDestroy))
1682 break;
1683 }
1684 switch (LastI->getOpcode()) {
1685 case AArch64::STGloop:
1686 case AArch64::STZGloop:
1687 case AArch64::STGi:
1688 case AArch64::STZGi:
1689 case AArch64::ST2Gi:
1690 case AArch64::STZ2Gi:
1691 return false;
1692 default:
1693 return true;
1694 }
1695 llvm_unreachable("unreachable");
1696}
1697
1698void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1700 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1702 // Avoid the reload as it is GOT relative, and instead fall back to the
1703 // hardcoded value below. This allows a mismatch between the OS and
1704 // application without immediately terminating on the difference.
1705 [[fallthrough]];
1707 // We need to reset FP to its untagged state on return. Bit 60 is
1708 // currently used to show the presence of an extended frame.
1709
1710 // BIC x29, x29, #0x1000_0000_0000_0000
1711 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
1712 AArch64::FP)
1713 .addUse(AArch64::FP)
1714 .addImm(0x10fe)
1716 if (NeedsWinCFI) {
1717 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1719 HasWinCFI = true;
1720 }
1721 break;
1722
1724 break;
1725 }
1726}
1727
1728void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1730 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1731 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
1732 .addReg(AArch64::X18, RegState::Define)
1733 .addReg(AArch64::LR, RegState::Define)
1734 .addReg(AArch64::X18)
1735 .addImm(-8)
1737
1738 if (NeedsWinCFI)
1739 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1741
1742 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1743 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1744 .buildRestore(AArch64::X18);
1745}
1746
1747void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1748 MachineBasicBlock::iterator MBBI, bool SVE) const {
1749 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1750 if (CSI.empty())
1751 return;
1752
1753 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1754
1755 for (const auto &Info : CSI) {
1756 if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
1757 continue;
1758
1759 MCRegister Reg = Info.getReg();
1760 if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
1761 continue;
1762
1763 CFIBuilder.buildRestore(Info.getReg());
1764 }
1765}
1766
1767void AArch64EpilogueEmitter::finalizeEpilogue() const {
1768 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1769 emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
1771 }
1772 if (EmitCFI)
1773 emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
1774 if (AFI->shouldSignReturnAddress(MF)) {
1775 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1776 // are inserted by emitPacRetPlusLeafHardening().
1777 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1778 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1779 TII->get(AArch64::PAUTH_EPILOGUE))
1781 }
1782 // AArch64PointerAuth pass will insert SEH_PACSignLR
1784 }
1785 if (HasWinCFI) {
1786 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1788 if (!MF.hasWinCFI())
1789 MF.setHasWinCFI(true);
1790 }
1791 if (NeedsWinCFI) {
1792 assert(SEHEpilogueStartI != MBB.end());
1793 if (!HasWinCFI)
1794 MBB.erase(SEHEpilogueStartI);
1795 }
1796}
1797
1798} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains constants used for implementing Dwarf debug support.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static const unsigned FramePtr
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0) const
bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize) const
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const
SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &)
const AArch64TargetLowering * getTargetLowering() const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void insertCFIInst(const MCCFIInstruction &CFIInst) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
A debug info location.
Definition DebugLoc.h:123
A set of physical registers with utility functions to track liveness when walking backward/forward th...
MachineInstrBundleIterator< MachineInstr > iterator
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static MachineOperand CreateImm(int64_t Val)
const char * getSymbolName() const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, RTLIB::Libcall LC)
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I)
@ Success
The lock was released successfully.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
static SVEPartitions partitionSVECS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, StackOffset PPRCalleeSavesSize, StackOffset ZPRCalleeSavesSize, bool IsEpilogue)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I)
static int64_t upperBound(StackOffset Size)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
MachineBasicBlock::iterator End
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 ZPR
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 PPR
MachineBasicBlock::iterator Begin