256#define DEBUG_TYPE "frame-info"
259 cl::desc(
"enable use of redzone on AArch64"),
263 "stack-tagging-merge-settag",
273 cl::desc(
"Emit homogeneous prologue and epilogue for the size "
274 "optimization (default = off)"));
288STATISTIC(NumRedZoneFunctions,
"Number of functions using red zone");
304 int64_t ArgumentPopSize = 0;
305 if (IsTailCallReturn) {
311 ArgumentPopSize = StackAdjust.
getImm();
320 return ArgumentPopSize;
331bool AArch64FrameLowering::homogeneousPrologEpilog(
356 if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
363 unsigned NumGPRs = 0;
364 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
366 if (Reg == AArch64::LR) {
367 assert(CSRegs[
I + 1] == AArch64::FP);
368 if (NumGPRs % 2 != 0)
372 if (AArch64::GPR64RegClass.
contains(Reg))
380bool AArch64FrameLowering::producePairRegisters(
MachineFunction &MF)
const {
399 if (
MI.isDebugInstr() ||
MI.isPseudo() ||
400 MI.getOpcode() == AArch64::ADDXri ||
401 MI.getOpcode() == AArch64::ADDSXri)
428 if (!IsWin64 || IsFunclet) {
433 Attribute::SwiftAsync))
438 const unsigned UnwindHelpObject = (MF.
hasEHFunclets() ? 8 : 0);
440 alignTo(VarArgsArea + UnwindHelpObject, 16);
457 const unsigned RedZoneSize =
470 bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
474 return !(MFI.
hasCalls() ||
hasFP(MF) || NumBytes > RedZoneSize ||
535 unsigned Opc =
I->getOpcode();
536 bool IsDestroy = Opc ==
TII->getCallFrameDestroyOpcode();
537 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
540 int64_t Amount =
I->getOperand(0).getImm();
548 if (CalleePopAmount == 0) {
559 assert(Amount > -0xffffff && Amount < 0xffffff &&
"call frame too large");
570 "non-reserved call frame without var sized objects?");
579 }
else if (CalleePopAmount != 0) {
582 assert(CalleePopAmount < 0xffffff &&
"call frame too large");
589void AArch64FrameLowering::emitCalleeSavedGPRLocations(
595 bool LocallyStreaming =
596 Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();
607 for (
const auto &Info : CSI) {
608 unsigned FrameIdx =
Info.getFrameIdx();
612 assert(!
Info.isSpilledToReg() &&
"Spilling to registers not implemented");
613 int64_t DwarfReg =
TRI.getDwarfRegNum(
Info.getReg(),
true);
620 (!LocallyStreaming &&
621 DwarfReg ==
TRI.getDwarfRegNum(AArch64::VG,
true)))
632void AArch64FrameLowering::emitCalleeSavedSVELocations(
648 for (
const auto &Info : CSI) {
654 assert(!
Info.isSpilledToReg() &&
"Spilling to registers not implemented");
689 const MCInstrDesc &CFIDesc =
TII.get(TargetOpcode::CFI_INSTRUCTION);
695 nullptr,
TRI.getDwarfRegNum(AArch64::SP,
true), 0));
699 if (MFI.shouldSignReturnAddress(MF)) {
705 if (MFI.needsShadowCallStackPrologueEpilogue(MF))
707 TRI.getDwarfRegNum(AArch64::X18,
true));
710 const std::vector<CalleeSavedInfo> &CSI =
712 for (
const auto &
Info : CSI) {
713 unsigned Reg =
Info.getReg();
714 if (!
TRI.regNeedsCFI(Reg, Reg))
717 TRI.getDwarfRegNum(Reg,
true));
736 for (
const auto &
Info : CSI) {
741 unsigned Reg =
Info.getReg();
746 if (!
Info.isRestored())
750 nullptr,
TRI.getDwarfRegNum(
Info.getReg(),
true)));
757void AArch64FrameLowering::emitCalleeSavedGPRRestores(
762void AArch64FrameLowering::emitCalleeSavedSVERestores(
770 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
771 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE +
Size.getFixed();
774void AArch64FrameLowering::allocateStackSpace(
776 int64_t RealignmentPadding,
StackOffset AllocSize,
bool NeedsWinCFI,
777 bool *HasWinCFI,
bool EmitCFI,
StackOffset InitialOffset,
778 bool FollowupAllocs)
const {
791 const uint64_t AndMask = ~(MaxAlign - 1);
794 Register TargetReg = RealignmentPadding
800 EmitCFI, InitialOffset);
802 if (RealignmentPadding) {
823 if (AllocSize.
getScalable() == 0 && RealignmentPadding == 0) {
825 assert(ScratchReg != AArch64::NoRegister);
835 if (FollowupAllocs) {
852 if (
upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
853 Register ScratchReg = RealignmentPadding
856 assert(ScratchReg != AArch64::NoRegister);
860 EmitCFI, InitialOffset);
861 if (RealignmentPadding) {
869 if (FollowupAllocs ||
upperBound(AllocSize) + RealignmentPadding >
885 assert(TargetReg != AArch64::NoRegister);
889 EmitCFI, InitialOffset);
890 if (RealignmentPadding) {
910 if (RealignmentPadding)
923 case AArch64::W##n: \
924 case AArch64::X##n: \
949 case AArch64::B##n: \
950 case AArch64::H##n: \
951 case AArch64::S##n: \
952 case AArch64::D##n: \
953 case AArch64::Q##n: \
954 return HasSVE ? AArch64::Z##n : AArch64::Q##n
991void AArch64FrameLowering::emitZeroCallUsedRegs(
BitVector RegsToZero,
1007 bool HasSVE = STI.hasSVE();
1009 if (
TRI.isGeneralPurposeRegister(MF, Reg)) {
1012 GPRsToZero.set(XReg);
1016 FPRsToZero.set(XReg);
1032 {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
1033 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
1034 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
1036 if (RegsToZero[PReg])
1048 for (
unsigned i = 0; CSRegs[i]; ++i)
1049 LiveRegs.
addReg(CSRegs[i]);
1083 for (
unsigned Reg : AArch64::GPR64RegClass) {
1087 return AArch64::NoRegister;
1133 StackSizeInBytes >=
uint64_t(MFI.getStackProbeSize());
1139 F.needsUnwindTableEntry();
1142bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
1148 if (homogeneousPrologEpilog(MF))
1171 if (MFI.hasVarSizedObjects())
1174 if (
RegInfo->hasStackRealignment(MF))
1191bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
1193 if (!shouldCombineCSRLocalStackBump(*
MBB.
getParent(), StackBumpBytes))
1203 while (LastI != Begin) {
1205 if (LastI->isTransient())
1210 switch (LastI->getOpcode()) {
1211 case AArch64::STGloop:
1212 case AArch64::STZGloop:
1214 case AArch64::STZGi:
1215 case AArch64::ST2Gi:
1216 case AArch64::STZ2Gi:
1229 unsigned Opc =
MBBI->getOpcode();
1233 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
1234 int Imm =
MBBI->getOperand(ImmIdx).getImm();
1242 case AArch64::LDPDpost:
1245 case AArch64::STPDpre: {
1246 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1247 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1248 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFRegP_X))
1255 case AArch64::LDPXpost:
1258 case AArch64::STPXpre: {
1261 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1262 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFPLR_X))
1266 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveRegP_X))
1273 case AArch64::LDRDpost:
1276 case AArch64::STRDpre: {
1277 unsigned Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1278 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFReg_X))
1284 case AArch64::LDRXpost:
1287 case AArch64::STRXpre: {
1288 unsigned Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1295 case AArch64::STPDi:
1296 case AArch64::LDPDi: {
1297 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1298 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1306 case AArch64::STPXi:
1307 case AArch64::LDPXi: {
1310 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1322 case AArch64::STRXui:
1323 case AArch64::LDRXui: {
1324 int Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1331 case AArch64::STRDui:
1332 case AArch64::LDRDui: {
1333 unsigned Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1340 case AArch64::STPQi:
1341 case AArch64::LDPQi: {
1342 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1343 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1344 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQP))
1351 case AArch64::LDPQpost:
1354 case AArch64::STPQpre: {
1355 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1356 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1357 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQPX))
1371 unsigned LocalStackSize) {
1373 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
1374 switch (
MBBI->getOpcode()) {
1377 case AArch64::SEH_SaveFPLR:
1378 case AArch64::SEH_SaveRegP:
1379 case AArch64::SEH_SaveReg:
1380 case AArch64::SEH_SaveFRegP:
1381 case AArch64::SEH_SaveFReg:
1382 case AArch64::SEH_SaveAnyRegQP:
1383 case AArch64::SEH_SaveAnyRegQPX:
1384 ImmOpnd = &
MBBI->getOperand(ImmIdx);
1404 if (ST.isTargetDarwin())
1410 unsigned Opc =
MBBI->getOpcode();
1411 if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1412 Opc == AArch64::UBFMXri)
1416 if (Opc == AArch64::ORRXrr)
1419 if (Opc == AArch64::BL) {
1420 auto Op1 =
MBBI->getOperand(0);
1421 return Op1.isSymbol() &&
1422 (
StringRef(Op1.getSymbolName()) ==
"__arm_get_current_vg");
1435 bool NeedsWinCFI,
bool *HasWinCFI,
bool EmitCFI,
1437 int CFAOffset = 0) {
1449 switch (
MBBI->getOpcode()) {
1452 case AArch64::STPXi:
1453 NewOpc = AArch64::STPXpre;
1455 case AArch64::STPDi:
1456 NewOpc = AArch64::STPDpre;
1458 case AArch64::STPQi:
1459 NewOpc = AArch64::STPQpre;
1461 case AArch64::STRXui:
1462 NewOpc = AArch64::STRXpre;
1464 case AArch64::STRDui:
1465 NewOpc = AArch64::STRDpre;
1467 case AArch64::STRQui:
1468 NewOpc = AArch64::STRQpre;
1470 case AArch64::LDPXi:
1471 NewOpc = AArch64::LDPXpost;
1473 case AArch64::LDPDi:
1474 NewOpc = AArch64::LDPDpost;
1476 case AArch64::LDPQi:
1477 NewOpc = AArch64::LDPQpost;
1479 case AArch64::LDRXui:
1480 NewOpc = AArch64::LDRXpost;
1482 case AArch64::LDRDui:
1483 NewOpc = AArch64::LDRDpost;
1485 case AArch64::LDRQui:
1486 NewOpc = AArch64::LDRQpost;
1491 auto SEH = std::next(
MBBI);
1493 SEH->eraseFromParent();
1497 int64_t MinOffset, MaxOffset;
1499 NewOpc, Scale, Width, MinOffset, MaxOffset);
1505 if (
MBBI->getOperand(
MBBI->getNumOperands() - 1).getImm() != 0 ||
1506 CSStackSizeInc < MinOffset * (int64_t)Scale.
getFixedValue() ||
1507 CSStackSizeInc > MaxOffset * (int64_t)Scale.
getFixedValue()) {
1514 false,
false,
nullptr, EmitCFI,
1517 return std::prev(
MBBI);
1524 unsigned OpndIdx = 0;
1525 for (
unsigned OpndEnd =
MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
1527 MIB.
add(
MBBI->getOperand(OpndIdx));
1529 assert(
MBBI->getOperand(OpndIdx).getImm() == 0 &&
1530 "Unexpected immediate offset in first/last callee-save save/restore "
1532 assert(
MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
1533 "Unexpected base register in callee-save save/restore instruction!");
1534 assert(CSStackSizeInc % Scale == 0);
1535 MIB.
addImm(CSStackSizeInc / (
int)Scale);
1566 unsigned Opc =
MI.getOpcode();
1569 case AArch64::STPXi:
1570 case AArch64::STRXui:
1571 case AArch64::STPDi:
1572 case AArch64::STRDui:
1573 case AArch64::LDPXi:
1574 case AArch64::LDRXui:
1575 case AArch64::LDPDi:
1576 case AArch64::LDRDui:
1579 case AArch64::STPQi:
1580 case AArch64::STRQui:
1581 case AArch64::LDPQi:
1582 case AArch64::LDRQui:
1589 unsigned OffsetIdx =
MI.getNumExplicitOperands() - 1;
1590 assert(
MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
1591 "Unexpected base register in callee-save save/restore instruction!");
1595 assert(LocalStackSize % Scale == 0);
1596 OffsetOpnd.
setImm(OffsetOpnd.
getImm() + LocalStackSize / Scale);
1601 assert(
MBBI !=
MI.getParent()->end() &&
"Expecting a valid instruction");
1603 "Expecting a SEH instruction");
1614 switch (
I->getOpcode()) {
1617 case AArch64::PTRUE_C_B:
1618 case AArch64::LD1B_2Z_IMM:
1619 case AArch64::ST1B_2Z_IMM:
1620 case AArch64::STR_ZXI:
1621 case AArch64::STR_PXI:
1622 case AArch64::LDR_ZXI:
1623 case AArch64::LDR_PXI:
1634 bool NeedsUnwindInfo) {
1650 if (NeedsUnwindInfo) {
1653 static const char CFIInst[] = {
1654 dwarf::DW_CFA_val_expression,
1657 static_cast<char>(
unsigned(dwarf::DW_OP_breg18)),
1658 static_cast<char>(-8) & 0x7f,
1661 nullptr,
StringRef(CFIInst,
sizeof(CFIInst))));
1699 const int OffsetToFirstCalleeSaveFromFP =
1703 unsigned Reg =
TRI->getDwarfRegNum(
FramePtr,
true);
1705 nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
1737 bool HasFP =
hasFP(MF);
1739 bool HasWinCFI =
false;
1748 while (NonFrameStart !=
End &&
1753 if (NonFrameStart !=
MBB.
end()) {
1769 if (NonFrameStart ==
MBB.
end())
1774 for (auto &Op : MI.operands())
1775 if (Op.isReg() && Op.isDef())
1776 assert(!LiveRegs.contains(Op.getReg()) &&
1777 "live register clobbered by inserted prologue instructions");
1794 if (MFnI.needsShadowCallStackPrologueEpilogue(MF))
1796 MFnI.needsDwarfUnwindInfo(MF));
1798 if (MFnI.shouldSignReturnAddress(MF)) {
1805 if (EmitCFI && MFnI.isMTETagged()) {
1883 assert(!HasFP &&
"unexpected function without stack frame but with FP");
1885 "unexpected function without stack frame but with SVE objects");
1894 ++NumRedZoneFunctions;
1926 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1927 bool HomPrologEpilog = homogeneousPrologEpilog(MF);
1928 if (CombineSPBump) {
1929 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
1935 }
else if (HomPrologEpilog) {
1937 NumBytes -= PrologueSaveSize;
1938 }
else if (PrologueSaveSize != 0) {
1940 MBB,
MBBI,
DL,
TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
1942 NumBytes -= PrologueSaveSize;
1944 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
1958 NeedsWinCFI, &HasWinCFI);
1963 if (!IsFunclet && HasFP) {
1975 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1976 if (HaveInitialContext)
1978 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1994 if (HomPrologEpilog) {
2007 if (NeedsWinCFI && HasWinCFI) {
2012 NeedsWinCFI =
false;
2023 emitCalleeSavedGPRLocations(
MBB,
MBBI);
2026 const bool NeedsRealignment =
2027 NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
2028 const int64_t RealignmentPadding =
2034 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
2042 if (NumBytes >= (1 << 28))
2044 "unwinding purposes");
2046 uint32_t LowNumWords = NumWords & 0xFFFF;
2053 if ((NumWords & 0xFFFF0000) != 0) {
2056 .
addImm((NumWords & 0xFFFF0000) >> 16)
2127 if (RealignmentPadding > 0) {
2128 if (RealignmentPadding >= 4096) {
2131 .
addImm(RealignmentPadding)
2141 .
addImm(RealignmentPadding)
2158 StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
2164 LLVM_DEBUG(
dbgs() <<
"SVECalleeSavedStackSize = " << CalleeSavedSize
2167 CalleeSavesBegin =
MBBI;
2171 CalleeSavesEnd =
MBBI;
2174 SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
2181 allocateStackSpace(
MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
false,
2182 nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
2184 CFAOffset += SVECalleeSavesSize;
2187 emitCalleeSavedSVELocations(
MBB, CalleeSavesEnd);
2192 "Cannot use redzone with stack realignment");
2197 allocateStackSpace(
MBB, CalleeSavesEnd, RealignmentPadding,
2199 NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
2211 if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
2223 if (NeedsWinCFI && HasWinCFI) {
2231 if (IsFunclet &&
F.hasPersonalityFn()) {
2241 if (EmitCFI && !EmitAsyncCFI) {
2248 *RegInfo, AArch64::SP, AArch64::SP, TotalSize,
2254 emitCalleeSavedGPRLocations(
MBB,
MBBI);
2255 emitCalleeSavedSVELocations(
MBB,
MBBI);
2260 switch (
MI.getOpcode()) {
2263 case AArch64::CATCHRET:
2264 case AArch64::CLEANUPRET:
2279 bool HasWinCFI =
false;
2280 bool IsFunclet =
false;
2283 DL =
MBBI->getDebugLoc();
2291 BuildMI(MBB, MBB.getFirstTerminator(), DL,
2292 TII->get(AArch64::PAUTH_EPILOGUE))
2293 .setMIFlag(MachineInstr::FrameDestroy);
2303 TII->get(AArch64::SEH_EpilogEnd))
2330 int64_t AfterCSRPopSize = ArgumentStackToRestore;
2338 if (homogeneousPrologEpilog(MF, &
MBB)) {
2342 auto HomogeneousEpilog = std::prev(LastPopI);
2343 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
2344 LastPopI = HomogeneousEpilog;
2354 assert(AfterCSRPopSize == 0);
2357 bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(
MBB, NumBytes);
2360 bool CombineAfterCSRBump =
false;
2361 if (!CombineSPBump && PrologueSaveSize != 0) {
2363 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
2365 Pop = std::prev(Pop);
2368 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
2372 if (OffsetOp.
getImm() == 0 && AfterCSRPopSize >= 0) {
2374 MBB, Pop,
DL,
TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
2381 AfterCSRPopSize += PrologueSaveSize;
2382 CombineAfterCSRBump =
true;
2391 while (LastPopI != Begin) {
2397 }
else if (CombineSPBump)
2399 NeedsWinCFI, &HasWinCFI);
2411 EpilogStartI = LastPopI;
2447 if (CombineSPBump) {
2448 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
2451 if (EmitCFI &&
hasFP(MF)) {
2453 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP,
true);
2468 NumBytes -= PrologueSaveSize;
2469 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
2473 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
2476 RestoreBegin = std::prev(RestoreEnd);
2477 while (RestoreBegin !=
MBB.
begin() &&
2486 DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
2487 DeallocateAfter = CalleeSavedSizeAsOffset;
2509 MBB, RestoreBegin,
DL, AArch64::SP, AArch64::SP,
2511 false,
false,
nullptr, EmitCFI && !
hasFP(MF),
2518 false,
nullptr, EmitCFI && !
hasFP(MF),
2524 false,
nullptr, EmitCFI && !
hasFP(MF),
2529 emitCalleeSavedSVERestores(
MBB, RestoreEnd);
2536 if (RedZone && AfterCSRPopSize == 0)
2543 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
2544 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
2545 if (NoCalleeSaveRestore)
2546 StackRestoreBytes += AfterCSRPopSize;
2549 MBB, LastPopI,
DL, AArch64::SP, AArch64::SP,
2556 if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
2569 MBB, LastPopI,
DL, AArch64::SP, AArch64::FP,
2572 }
else if (NumBytes)
2578 if (EmitCFI &&
hasFP(MF)) {
2580 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP,
true);
2591 if (AfterCSRPopSize) {
2592 assert(AfterCSRPopSize > 0 &&
"attempting to reallocate arg stack that an "
2593 "interrupt may have clobbered");
2598 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
2640 if (MFI.isVariableSizedObjectIndex(FI)) {
2654 bool IsFixed = MFI.isFixedObjectIndex(FI);
2659 if (!IsFixed && !IsCSR)
2660 ScalableOffset = -SVEStackSize;
2672 int64_t ObjectOffset) {
2676 bool IsWin64 = Subtarget.isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
2677 unsigned FixedObject =
2686 int64_t ObjectOffset) {
2697 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
2704 bool ForSimm)
const {
2707 bool isFixed = MFI.isFixedObjectIndex(FI);
2714 const MachineFunction &MF, int64_t ObjectOffset,
bool isFixed,
bool isSVE,
2715 Register &FrameReg,
bool PreferFP,
bool ForSimm)
const {
2738 PreferFP &= !SVEStackSize;
2746 }
else if (isCSR && RegInfo->hasStackRealignment(MF)) {
2750 assert(
hasFP(MF) &&
"Re-aligned stack must have frame pointer");
2752 }
else if (
hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
2757 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
2758 PreferFP |=
Offset > -FPOffset && !SVEStackSize;
2760 if (MFI.hasVarSizedObjects()) {
2764 bool CanUseBP = RegInfo->hasBasePointer(MF);
2765 if (FPOffsetFits && CanUseBP)
2772 }
else if (FPOffset >= 0) {
2777 }
else if (MF.
hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
2784 "Funclets should only be present on Win64");
2788 if (FPOffsetFits && PreferFP)
2795 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
2796 "In the presence of dynamic stack pointer realignment, "
2797 "non-argument/CSR objects cannot be accessed through the frame pointer");
2809 RegInfo->hasStackRealignment(MF))) {
2810 FrameReg = RegInfo->getFrameRegister(MF);
2814 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
2820 if (UseFP && !(isFixed || isCSR))
2821 ScalableOffset = -SVEStackSize;
2822 if (!UseFP && (isFixed || isCSR))
2823 ScalableOffset = SVEStackSize;
2826 FrameReg = RegInfo->getFrameRegister(MF);
2831 if (RegInfo->hasBasePointer(MF))
2832 FrameReg = RegInfo->getBaseRegister();
2834 assert(!MFI.hasVarSizedObjects() &&
2835 "Can't use SP when we have var sized objects.");
2836 FrameReg = AArch64::SP;
2863 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
2869 bool NeedsWinCFI,
bool IsFirst,
2878 if (Reg2 == AArch64::FP)
2882 if (
TRI->getEncodingValue(Reg2) ==
TRI->getEncodingValue(Reg1) + 1)
2889 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
2890 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
2900 bool UsesWinAAPCS,
bool NeedsWinCFI,
2901 bool NeedsFrameRecord,
bool IsFirst,
2909 if (NeedsFrameRecord)
2910 return Reg2 == AArch64::LR;
2918 unsigned Reg1 = AArch64::NoRegister;
2919 unsigned Reg2 = AArch64::NoRegister;
2922 enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG }
Type;
2924 RegPairInfo() =
default;
2926 bool isPaired()
const {
return Reg2 != AArch64::NoRegister; }
2928 unsigned getScale()
const {
2943 bool isScalable()
const {
return Type == PPR ||
Type == ZPR; }
2949 for (
unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
2950 if (SavedRegs.
test(PReg)) {
2951 unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
2955 return AArch64::NoRegister;
2961 bool NeedsFrameRecord) {
2971 unsigned Count = CSI.
size();
2978 "Odd number of callee-saved regs to spill!");
2980 int StackFillDir = -1;
2982 unsigned FirstReg = 0;
2990 FirstReg = Count - 1;
2997 for (
unsigned i = FirstReg; i < Count; i += RegInc) {
2999 RPI.Reg1 = CSI[i].getReg();
3001 if (AArch64::GPR64RegClass.
contains(RPI.Reg1))
3002 RPI.Type = RegPairInfo::GPR;
3003 else if (AArch64::FPR64RegClass.
contains(RPI.Reg1))
3004 RPI.Type = RegPairInfo::FPR64;
3005 else if (AArch64::FPR128RegClass.
contains(RPI.Reg1))
3006 RPI.Type = RegPairInfo::FPR128;
3007 else if (AArch64::ZPRRegClass.
contains(RPI.Reg1))
3008 RPI.Type = RegPairInfo::ZPR;
3009 else if (AArch64::PPRRegClass.
contains(RPI.Reg1))
3010 RPI.Type = RegPairInfo::PPR;
3011 else if (RPI.Reg1 == AArch64::VG)
3012 RPI.Type = RegPairInfo::VG;
3025 Register NextReg = CSI[i + RegInc].getReg();
3026 bool IsFirst = i == FirstReg;
3028 case RegPairInfo::GPR:
3029 if (AArch64::GPR64RegClass.
contains(NextReg) &&
3031 NeedsWinCFI, NeedsFrameRecord, IsFirst,
3035 case RegPairInfo::FPR64:
3036 if (AArch64::FPR64RegClass.
contains(NextReg) &&
3041 case RegPairInfo::FPR128:
3042 if (AArch64::FPR128RegClass.
contains(NextReg))
3045 case RegPairInfo::PPR:
3047 case RegPairInfo::ZPR:
3049 if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1))
3052 case RegPairInfo::VG:
3063 assert((!RPI.isPaired() ||
3064 (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
3065 "Out of order callee saved regs!");
3067 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
3068 RPI.Reg1 == AArch64::LR) &&
3069 "FrameRecord must be allocated together with LR");
3072 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
3073 RPI.Reg2 == AArch64::LR) &&
3074 "FrameRecord must be allocated together with LR");
3082 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
3083 RPI.Reg1 + 1 == RPI.Reg2))) &&
3084 "Callee-save registers not saved as adjacent register pair!");
3086 RPI.FrameIdx = CSI[i].getFrameIdx();
3089 RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
3090 int Scale = RPI.getScale();
3092 int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3093 assert(OffsetPre % Scale == 0);
3095 if (RPI.isScalable())
3096 ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3098 ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3103 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
3104 (IsWindows && RPI.Reg2 == AArch64::LR)))
3105 ByteOffset += StackFillDir * 8;
3109 if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
3110 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
3111 ByteOffset % 16 != 0) {
3112 ByteOffset += 8 * StackFillDir;
3113 assert(MFI.getObjectAlign(RPI.FrameIdx) <=
Align(16));
3117 MFI.setObjectAlignment(RPI.FrameIdx,
Align(16));
3118 NeedGapToAlignStack =
false;
3121 int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3122 assert(OffsetPost % Scale == 0);
3125 int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
3130 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
3131 (IsWindows && RPI.Reg2 == AArch64::LR)))
3133 RPI.Offset =
Offset / Scale;
3135 assert((!RPI.isPaired() ||
3136 (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
3137 (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
3138 "Offset out of bounds for LDP/STP immediate");
3142 if (NeedsFrameRecord &&
3143 ((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
3144 (IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR)))
3158 MFI.setObjectAlignment(CSI[0].getFrameIdx(),
Align(16));
3161 std::reverse(RegPairs.
begin(), RegPairs.
end());
3180 MRI.freezeReservedRegs();
3182 if (homogeneousPrologEpilog(MF)) {
3186 for (
auto &RPI : RegPairs) {
3191 if (!
MRI.isReserved(RPI.Reg1))
3193 if (RPI.isPaired() && !
MRI.isReserved(RPI.Reg2))
3198 bool PTrueCreated =
false;
3200 unsigned Reg1 = RPI.Reg1;
3201 unsigned Reg2 = RPI.Reg2;
3217 case RegPairInfo::GPR:
3218 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
3220 Alignment =
Align(8);
3222 case RegPairInfo::FPR64:
3223 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
3225 Alignment =
Align(8);
3227 case RegPairInfo::FPR128:
3228 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
3230 Alignment =
Align(16);
3232 case RegPairInfo::ZPR:
3233 StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
3235 Alignment =
Align(16);
3237 case RegPairInfo::PPR:
3238 StrOpc = AArch64::STR_PXI;
3240 Alignment =
Align(2);
3242 case RegPairInfo::VG:
3243 StrOpc = AArch64::STRXui;
3245 Alignment =
Align(8);
3249 unsigned X0Scratch = AArch64::NoRegister;
3250 if (Reg1 == AArch64::VG) {
3253 assert(Reg1 != AArch64::NoRegister);
3256 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
3281 return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3282 AArch64::X0, LiveIn.PhysReg);
3286 if (X0Scratch != AArch64::NoRegister)
3293 const uint32_t *RegMask =
TRI->getCallPreservedMask(
3308 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
3309 if (RPI.isPaired())
dbgs() <<
", " << RPI.FrameIdx + 1;
3312 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
3313 "Windows unwdinding requires a consecutive (FP,LR) pair");
3317 unsigned FrameIdxReg1 = RPI.FrameIdx;
3318 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
3319 if (NeedsWinCFI && RPI.isPaired()) {
3324 if (RPI.isPaired() && RPI.isScalable()) {
3329 assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) &&
3330 "Expects SVE2.1 or SME2 target and a predicate register");
3331#ifdef EXPENSIVE_CHECKS
3332 auto IsPPR = [](
const RegPairInfo &c) {
3333 return c.Reg1 == RegPairInfo::PPR;
3335 auto PPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsPPR);
3336 auto IsZPR = [](
const RegPairInfo &c) {
3337 return c.Type == RegPairInfo::ZPR;
3339 auto ZPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsZPR);
3340 assert(!(PPRBegin < ZPRBegin) &&
3341 "Expected callee save predicate to be handled first");
3343 if (!PTrueCreated) {
3344 PTrueCreated =
true;
3349 if (!
MRI.isReserved(Reg1))
3351 if (!
MRI.isReserved(Reg2))
3353 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
3369 if (!
MRI.isReserved(Reg1))
3371 if (RPI.isPaired()) {
3372 if (!
MRI.isReserved(Reg2))
3392 if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) {
3398 if (X0Scratch != AArch64::NoRegister)
3418 DL =
MBBI->getDebugLoc();
3421 if (homogeneousPrologEpilog(MF, &
MBB)) {
3424 for (
auto &RPI : RegPairs) {
3432 auto IsPPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::PPR; };
3433 auto PPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsPPR);
3434 auto PPREnd = std::find_if_not(PPRBegin, RegPairs.
end(), IsPPR);
3435 std::reverse(PPRBegin, PPREnd);
3436 auto IsZPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::ZPR; };
3437 auto ZPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsZPR);
3438 auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.
end(), IsZPR);
3439 std::reverse(ZPRBegin, ZPREnd);
3441 bool PTrueCreated =
false;
3442 for (
const RegPairInfo &RPI : RegPairs) {
3443 unsigned Reg1 = RPI.Reg1;
3444 unsigned Reg2 = RPI.Reg2;
3458 case RegPairInfo::GPR:
3459 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
3461 Alignment =
Align(8);
3463 case RegPairInfo::FPR64:
3464 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
3466 Alignment =
Align(8);
3468 case RegPairInfo::FPR128:
3469 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
3471 Alignment =
Align(16);
3473 case RegPairInfo::ZPR:
3474 LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
3476 Alignment =
Align(16);
3478 case RegPairInfo::PPR:
3479 LdrOpc = AArch64::LDR_PXI;
3481 Alignment =
Align(2);
3483 case RegPairInfo::VG:
3488 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
3489 if (RPI.isPaired())
dbgs() <<
", " << RPI.FrameIdx + 1;
3495 unsigned FrameIdxReg1 = RPI.FrameIdx;
3496 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
3497 if (NeedsWinCFI && RPI.isPaired()) {
3503 if (RPI.isPaired() && RPI.isScalable()) {
3507 assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) &&
3508 "Expects SVE2.1 or SME2 target and a predicate register");
3509#ifdef EXPENSIVE_CHECKS
3510 assert(!(PPRBegin < ZPRBegin) &&
3511 "Expected callee save predicate to be handled first");
3513 if (!PTrueCreated) {
3514 PTrueCreated =
true;
3519 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
3536 if (RPI.isPaired()) {
3561 dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->
getPseudoValue());
3563 return std::optional<int>(PSV->getFrameIndex());
3574 return std::nullopt;
3580 if (!
MI.mayLoadOrStore() ||
MI.getNumMemOperands() < 1)
3581 return std::nullopt;
3589void AArch64FrameLowering::determineStackHazardSlot(
3604 bool HasFPRCSRs =
any_of(SavedRegs.
set_bits(), [](
unsigned Reg) {
3605 return AArch64::FPR64RegClass.contains(Reg) ||
3606 AArch64::FPR128RegClass.contains(Reg) ||
3607 AArch64::ZPRRegClass.contains(Reg) ||
3608 AArch64::PPRRegClass.contains(Reg);
3610 bool HasFPRStackObjects =
false;
3613 for (
auto &
MBB : MF) {
3614 for (
auto &
MI :
MBB) {
3616 if (FI && *FI >= 0 && *FI < (
int)FrameObjects.size()) {
3619 FrameObjects[*FI] |= 2;
3621 FrameObjects[*FI] |= 1;
3625 HasFPRStackObjects =
3626 any_of(FrameObjects, [](
unsigned B) {
return (
B & 3) == 2; });
3629 if (HasFPRCSRs || HasFPRStackObjects) {
3650 unsigned UnspilledCSGPR = AArch64::NoRegister;
3651 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
3660 unsigned ExtraCSSpill = 0;
3661 bool HasUnpairedGPR64 =
false;
3662 bool HasPairZReg =
false;
3664 for (
unsigned i = 0; CSRegs[i]; ++i) {
3665 const unsigned Reg = CSRegs[i];
3668 if (Reg == BasePointerReg)
3671 bool RegUsed = SavedRegs.
test(Reg);
3672 unsigned PairedReg = AArch64::NoRegister;
3673 const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
3674 if (RegIsGPR64 || AArch64::FPR64RegClass.
contains(Reg) ||
3675 AArch64::FPR128RegClass.contains(Reg)) {
3678 if (HasUnpairedGPR64)
3679 PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
3681 PairedReg = CSRegs[i ^ 1];
3688 if (RegIsGPR64 && !AArch64::GPR64RegClass.
contains(PairedReg)) {
3689 PairedReg = AArch64::NoRegister;
3690 HasUnpairedGPR64 =
true;
3692 assert(PairedReg == AArch64::NoRegister ||
3693 AArch64::GPR64RegClass.
contains(Reg, PairedReg) ||
3694 AArch64::FPR64RegClass.
contains(Reg, PairedReg) ||
3695 AArch64::FPR128RegClass.
contains(Reg, PairedReg));
3698 if (AArch64::GPR64RegClass.
contains(Reg) &&
3700 UnspilledCSGPR = Reg;
3701 UnspilledCSGPRPaired = PairedReg;
3709 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
3710 !SavedRegs.
test(PairedReg)) {
3711 SavedRegs.
set(PairedReg);
3712 if (AArch64::GPR64RegClass.
contains(PairedReg) &&
3714 ExtraCSSpill = PairedReg;
3717 HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
3718 SavedRegs.
test(CSRegs[i ^ 1]));
3721 if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {
3726 if (PnReg != AArch64::NoRegister)
3732 SavedRegs.
set(AArch64::P8);
3737 "Predicate cannot be a reserved register");
3747 SavedRegs.
set(AArch64::X18);
3751 unsigned CSStackSize = 0;
3752 unsigned SVECSStackSize = 0;
3755 for (
unsigned Reg : SavedRegs.
set_bits()) {
3757 if (AArch64::PPRRegClass.
contains(Reg) ||
3758 AArch64::ZPRRegClass.
contains(Reg))
3771 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3779 determineStackHazardSlot(MF, SavedRegs);
3780 if (AFI->hasStackHazardSlotIndex())
3784 unsigned NumSavedRegs = SavedRegs.
count();
3790 SavedRegs.
set(AArch64::FP);
3791 SavedRegs.
set(AArch64::LR);
3795 dbgs() <<
"*** determineCalleeSaves\nSaved CSRs:";
3796 for (
unsigned Reg : SavedRegs.
set_bits())
3802 int64_t SVEStackSize =
3803 alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
3804 bool CanEliminateFrame = (SavedRegs.
count() == 0) && !SVEStackSize;
3813 int64_t CalleeStackUsed = 0;
3816 if (FixedOff > CalleeStackUsed)
3817 CalleeStackUsed = FixedOff;
3821 bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
3822 CalleeStackUsed) > EstimatedStackSizeLimit;
3824 AFI->setHasStackFrame(
true);
3833 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
3835 <<
" to get a scratch register.\n");
3836 SavedRegs.
set(UnspilledCSGPR);
3837 ExtraCSSpill = UnspilledCSGPR;
3842 if (producePairRegisters(MF)) {
3843 if (UnspilledCSGPRPaired == AArch64::NoRegister) {
3846 SavedRegs.
reset(UnspilledCSGPR);
3847 ExtraCSSpill = AArch64::NoRegister;
3850 SavedRegs.
set(UnspilledCSGPRPaired);
3859 unsigned Size =
TRI->getSpillSize(RC);
3860 Align Alignment =
TRI->getSpillAlign(RC);
3863 LLVM_DEBUG(
dbgs() <<
"No available CS registers, allocated fi#" << FI
3864 <<
" as the emergency spill slot.\n");
3869 CSStackSize += 8 * (SavedRegs.
count() - NumSavedRegs);
3873 if (
hasFP(MF) && AFI->hasSwiftAsyncContext())
3878 << EstimatedStackSize + AlignedCSStackSize <<
" bytes.\n");
3881 AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
3882 "Should not invalidate callee saved info");
3886 AFI->setCalleeSavedStackSize(AlignedCSStackSize);
3887 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
3888 AFI->setSVECalleeSavedStackSize(
alignTo(SVECSStackSize, 16));
3893 std::vector<CalleeSavedInfo> &CSI,
unsigned &MinCSFrameIndex,
3894 unsigned &MaxCSFrameIndex)
const {
3902 std::reverse(CSI.begin(), CSI.end());
3916 if ((
unsigned)FrameIdx < MinCSFrameIndex)
3917 MinCSFrameIndex = FrameIdx;
3918 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
3919 MaxCSFrameIndex = FrameIdx;
3924 std::vector<CalleeSavedInfo> VGSaves;
3928 VGInfo.setRestored(
false);
3929 VGSaves.push_back(VGInfo);
3933 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3934 VGSaves.push_back(VGInfo);
3936 bool InsertBeforeLR =
false;
3938 for (
unsigned I = 0;
I < CSI.size();
I++)
3939 if (CSI[
I].
getReg() == AArch64::LR) {
3940 InsertBeforeLR =
true;
3941 CSI.insert(CSI.begin() +
I, VGSaves.begin(), VGSaves.end());
3945 if (!InsertBeforeLR)
3946 CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());
3950 int HazardSlotIndex = std::numeric_limits<int>::max();
3951 for (
auto &CS : CSI) {
3959 assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
3960 "Unexpected register order for hazard slot");
3962 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
3965 if ((
unsigned)HazardSlotIndex < MinCSFrameIndex)
3966 MinCSFrameIndex = HazardSlotIndex;
3967 if ((
unsigned)HazardSlotIndex > MaxCSFrameIndex)
3968 MaxCSFrameIndex = HazardSlotIndex;
3974 CS.setFrameIdx(FrameIdx);
3976 if ((
unsigned)FrameIdx < MinCSFrameIndex)
3977 MinCSFrameIndex = FrameIdx;
3978 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
3979 MaxCSFrameIndex = FrameIdx;
3983 Reg == AArch64::FP) {
3986 if ((
unsigned)FrameIdx < MinCSFrameIndex)
3987 MinCSFrameIndex = FrameIdx;
3988 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
3989 MaxCSFrameIndex = FrameIdx;
3996 HazardSlotIndex == std::numeric_limits<int>::max()) {
3998 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
4001 if ((
unsigned)HazardSlotIndex < MinCSFrameIndex)
4002 MinCSFrameIndex = HazardSlotIndex;
4003 if ((
unsigned)HazardSlotIndex > MaxCSFrameIndex)
4004 MaxCSFrameIndex = HazardSlotIndex;
4028 int &Min,
int &Max) {
4029 Min = std::numeric_limits<int>::max();
4030 Max = std::numeric_limits<int>::min();
4036 for (
auto &CS : CSI) {
4037 if (AArch64::ZPRRegClass.
contains(CS.getReg()) ||
4038 AArch64::PPRRegClass.contains(CS.getReg())) {
4039 assert((Max == std::numeric_limits<int>::min() ||
4040 Max + 1 == CS.getFrameIdx()) &&
4041 "SVE CalleeSaves are not consecutive");
4043 Min = std::min(Min, CS.getFrameIdx());
4044 Max = std::max(Max, CS.getFrameIdx());
4047 return Min != std::numeric_limits<int>::max();
4056 int &MinCSFrameIndex,
4057 int &MaxCSFrameIndex,
4058 bool AssignOffsets) {
4063 "SVE vectors should never be passed on the stack by value, only by "
4067 auto Assign = [&MFI](
int FI, int64_t
Offset) {
4077 for (
int I = MinCSFrameIndex;
I <= MaxCSFrameIndex; ++
I) {
4093 int StackProtectorFI = -1;
4097 ObjectsToAllocate.
push_back(StackProtectorFI);
4103 if (
I == StackProtectorFI)
4105 if (MaxCSFrameIndex >=
I &&
I >= MinCSFrameIndex)
4114 for (
unsigned FI : ObjectsToAllocate) {
4119 if (Alignment >
Align(16))
4121 "Alignment of scalable vectors > 16 bytes is not yet supported");
4131int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
4133 int MinCSFrameIndex, MaxCSFrameIndex;
4137int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
4148 "Upwards growing stack unsupported");
4150 int MinCSFrameIndex, MaxCSFrameIndex;
4151 int64_t SVEStackSize =
4152 assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
4172 int64_t FixedObject =
4185 assert(DstReg &&
"There must be a free register after frame setup");
4194struct TagStoreInstr {
4217 std::optional<int64_t> FrameRegUpdate;
4219 unsigned FrameRegUpdateFlags;
4230 :
MBB(
MBB), ZeroData(ZeroData) {
4236 void addInstruction(TagStoreInstr
I) {
4238 TagStores.
back().Offset + TagStores.
back().Size ==
I.Offset) &&
4239 "Non-adjacent tag store instructions.");
4254 const int64_t kMinOffset = -256 * 16;
4255 const int64_t kMaxOffset = 255 * 16;
4258 int64_t BaseRegOffsetBytes = FrameRegOffset.
getFixed();
4259 if (BaseRegOffsetBytes < kMinOffset ||
4260 BaseRegOffsetBytes + (
Size -
Size % 32) > kMaxOffset ||
4264 BaseRegOffsetBytes % 16 != 0) {
4265 Register ScratchReg =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4268 BaseReg = ScratchReg;
4269 BaseRegOffsetBytes = 0;
4274 int64_t InstrSize = (
Size > 16) ? 32 : 16;
4277 ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
4278 : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi);
4279 assert(BaseRegOffsetBytes % 16 == 0);
4283 .
addImm(BaseRegOffsetBytes / 16)
4287 if (BaseRegOffsetBytes == 0)
4289 BaseRegOffsetBytes += InstrSize;
4303 :
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4304 Register SizeReg =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4308 int64_t LoopSize =
Size;
4311 if (FrameRegUpdate && *FrameRegUpdate)
4312 LoopSize -= LoopSize % 32;
4314 TII->get(ZeroData ? AArch64::STZGloop_wback
4315 : AArch64::STGloop_wback))
4322 LoopI->
setFlags(FrameRegUpdateFlags);
4324 int64_t ExtraBaseRegUpdate =
4325 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.
getFixed() -
Size) : 0;
4326 if (LoopSize <
Size) {
4331 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
4335 .
addImm(1 + ExtraBaseRegUpdate / 16)
4338 }
else if (ExtraBaseRegUpdate) {
4342 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
4345 .
addImm(std::abs(ExtraBaseRegUpdate))
4355 int64_t
Size, int64_t *TotalOffset) {
4357 if ((
MI.getOpcode() == AArch64::ADDXri ||
4358 MI.getOpcode() == AArch64::SUBXri) &&
4359 MI.getOperand(0).getReg() == Reg &&
MI.getOperand(1).getReg() == Reg) {
4361 int64_t
Offset =
MI.getOperand(2).getImm() << Shift;
4362 if (
MI.getOpcode() == AArch64::SUBXri)
4364 int64_t AbsPostOffset = std::abs(
Offset -
Size);
4365 const int64_t kMaxOffset =
4367 if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
4378 for (
auto &TS : TSE) {
4382 if (
MI->memoperands_empty()) {
4386 MemRefs.
append(
MI->memoperands_begin(),
MI->memoperands_end());
4392 bool TryMergeSPUpdate) {
4393 if (TagStores.
empty())
4395 TagStoreInstr &FirstTagStore = TagStores[0];
4396 TagStoreInstr &LastTagStore = TagStores[TagStores.
size() - 1];
4397 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
4398 DL = TagStores[0].MI->getDebugLoc();
4402 *MF, FirstTagStore.Offset,
false ,
false , Reg,
4405 FrameRegUpdate = std::nullopt;
4407 mergeMemRefs(TagStores, CombinedMemRefs);
4410 dbgs() <<
"Replacing adjacent STG instructions:\n";
4411 for (
const auto &Instr : TagStores) {
4420 if (TagStores.
size() < 2)
4422 emitUnrolled(InsertI);
4425 int64_t TotalOffset = 0;
4426 if (TryMergeSPUpdate) {
4432 if (InsertI !=
MBB->
end() &&
4433 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.
getFixed() +
Size,
4435 UpdateInstr = &*InsertI++;
4441 if (!UpdateInstr && TagStores.
size() < 2)
4445 FrameRegUpdate = TotalOffset;
4446 FrameRegUpdateFlags = UpdateInstr->
getFlags();
4453 for (
auto &TS : TagStores)
4454 TS.MI->eraseFromParent();
4458 int64_t &
Size,
bool &ZeroData) {
4462 unsigned Opcode =
MI.getOpcode();
4463 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
4464 Opcode == AArch64::STZ2Gi);
4466 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
4467 if (!
MI.getOperand(0).isDead() || !
MI.getOperand(1).isDead())
4469 if (!
MI.getOperand(2).isImm() || !
MI.getOperand(3).isFI())
4472 Size =
MI.getOperand(2).getImm();
4476 if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
4478 else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
4483 if (
MI.getOperand(0).getReg() != AArch64::SP || !
MI.getOperand(1).isFI())
4487 16 *
MI.getOperand(2).getImm();
4507 if (!isMergeableStackTaggingInstruction(
MI,
Offset,
Size, FirstZeroData))
4513 constexpr int kScanLimit = 10;
4516 NextI != E && Count < kScanLimit; ++NextI) {
4525 if (isMergeableStackTaggingInstruction(
MI,
Offset,
Size, ZeroData)) {
4526 if (ZeroData != FirstZeroData)
4534 if (!
MI.isTransient())
4543 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects())
4559 LiveRegs.addLiveOuts(*
MBB);
4564 LiveRegs.stepBackward(*
I);
4567 if (LiveRegs.contains(AArch64::NZCV))
4571 [](
const TagStoreInstr &
Left,
const TagStoreInstr &
Right) {
4576 int64_t CurOffset = Instrs[0].Offset;
4577 for (
auto &Instr : Instrs) {
4578 if (CurOffset >
Instr.Offset)
4585 TagStoreEdit TSE(
MBB, FirstZeroData);
4586 std::optional<int64_t> EndOffset;
4587 for (
auto &Instr : Instrs) {
4588 if (EndOffset && *EndOffset !=
Instr.Offset) {
4590 TSE.emitCode(InsertI, TFI,
false);