267#define DEBUG_TYPE "frame-info"
270 cl::desc(
"enable use of redzone on AArch64"),
274 "stack-tagging-merge-settag",
284 cl::desc(
"Split allocation of ZPR & PPR objects"),
289 cl::desc(
"Emit homogeneous prologue and epilogue for the size "
290 "optimization (default = off)"));
302 "aarch64-disable-multivector-spill-fill",
311 bool IsTailCallReturn = (
MBB.end() !=
MBBI)
315 int64_t ArgumentPopSize = 0;
316 if (IsTailCallReturn) {
322 ArgumentPopSize = StackAdjust.
getImm();
331 return ArgumentPopSize;
389bool AArch64FrameLowering::homogeneousPrologEpilog(
411 if (Exit && getArgumentStackToRestore(MF, *Exit))
414 auto *AFI = MF.
getInfo<AArch64FunctionInfo>();
422 unsigned NumGPRs = 0;
423 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
425 if (
Reg == AArch64::LR) {
426 assert(CSRegs[
I + 1] == AArch64::FP);
427 if (NumGPRs % 2 != 0)
439bool AArch64FrameLowering::producePairRegisters(
MachineFunction &MF)
const {
458 if (
MI.isDebugInstr() ||
MI.isPseudo() ||
459 MI.getOpcode() == AArch64::ADDXri ||
460 MI.getOpcode() == AArch64::ADDSXri)
485 bool IsWin64,
bool IsFunclet)
const {
487 "Tail call reserved stack must be aligned to 16 bytes");
488 if (!IsWin64 || IsFunclet) {
493 Attribute::SwiftAsync))
507 int FrameIndex =
H.CatchObj.FrameIndex;
508 if ((FrameIndex != INT_MAX) &&
509 CatchObjFrameIndices.
insert(FrameIndex)) {
510 FixedObjectSize =
alignTo(FixedObjectSize,
517 FixedObjectSize += 8;
519 return alignTo(FixedObjectSize, 16);
530 const unsigned RedZoneSize =
543 bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
547 return !(MFI.
hasCalls() ||
hasFP(MF) || NumBytes > RedZoneSize ||
568 RegInfo->hasStackRealignment(MF))
615 if (TT.isOSDarwin() || TT.isOSWindows())
653 unsigned Opc =
I->getOpcode();
654 bool IsDestroy =
Opc ==
TII->getCallFrameDestroyOpcode();
655 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
658 int64_t Amount =
I->getOperand(0).getImm();
666 if (CalleePopAmount == 0) {
677 assert(Amount > -0xffffff && Amount < 0xffffff &&
"call frame too large");
688 "non-reserved call frame without var sized objects?");
697 }
else if (CalleePopAmount != 0) {
700 assert(CalleePopAmount < 0xffffff &&
"call frame too large");
712 const auto &
TRI = *Subtarget.getRegisterInfo();
718 CFIBuilder.buildDefCFA(AArch64::SP, 0);
721 if (MFI.shouldSignReturnAddress(MF))
722 MFI.branchProtectionPAuthLR() ? CFIBuilder.buildNegateRAStateWithPC()
723 : CFIBuilder.buildNegateRAState();
726 if (MFI.needsShadowCallStackPrologueEpilogue(MF))
727 CFIBuilder.buildSameValue(AArch64::X18);
730 const std::vector<CalleeSavedInfo> &CSI =
732 for (
const auto &Info : CSI) {
734 if (!
TRI.regNeedsCFI(Reg, Reg))
736 CFIBuilder.buildSameValue(Reg);
749 case AArch64::W##n: \
750 case AArch64::X##n: \
775 case AArch64::B##n: \
776 case AArch64::H##n: \
777 case AArch64::S##n: \
778 case AArch64::D##n: \
779 case AArch64::Q##n: \
780 return HasSVE ? AArch64::Z##n : AArch64::Q##n
817void AArch64FrameLowering::emitZeroCallUsedRegs(
BitVector RegsToZero,
828 const AArch64Subtarget &STI = MF.
getSubtarget<AArch64Subtarget>();
831 BitVector GPRsToZero(
TRI.getNumRegs());
832 BitVector FPRsToZero(
TRI.getNumRegs());
835 if (
TRI.isGeneralPurposeRegister(MF,
Reg)) {
838 GPRsToZero.set(XReg);
842 FPRsToZero.set(XReg);
849 for (MCRegister
Reg : GPRsToZero.set_bits())
853 for (MCRegister
Reg : FPRsToZero.set_bits())
857 for (MCRegister PReg :
858 {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
859 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
860 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
862 if (RegsToZero[PReg])
868bool AArch64FrameLowering::windowsRequiresStackProbe(
870 const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
871 const AArch64FunctionInfo &MFI = *MF.
getInfo<AArch64FunctionInfo>();
875 StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
884 for (
unsigned i = 0; CSRegs[i]; ++i)
890 bool HasCall)
const {
900 const AArch64Subtarget &Subtarget = MF->
getSubtarget<AArch64Subtarget>();
902 LivePhysRegs LiveRegs(
TRI);
905 LiveRegs.addReg(AArch64::X16);
906 LiveRegs.addReg(AArch64::X17);
907 LiveRegs.addReg(AArch64::X18);
912 if (LiveRegs.available(
MRI, AArch64::X9))
915 for (
unsigned Reg : AArch64::GPR64RegClass) {
916 if (LiveRegs.available(
MRI,
Reg))
919 return AArch64::NoRegister;
946 MBB.isLiveIn(AArch64::NZCV))
950 if (findScratchNonCalleeSaveRegister(TmpMBB) == AArch64::NoRegister)
956 windowsRequiresStackProbe(*MF, std::numeric_limits<uint64_t>::max()))
957 if (findScratchNonCalleeSaveRegister(TmpMBB,
true) == AArch64::NoRegister)
966 F.needsUnwindTableEntry();
969bool AArch64FrameLowering::shouldSignReturnAddressEverywhere(
977 return SignReturnAddressAll;
986 unsigned Opc =
MBBI->getOpcode();
990 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
991 int Imm =
MBBI->getOperand(ImmIdx).getImm();
999 case AArch64::STR_ZXI:
1000 case AArch64::LDR_ZXI: {
1001 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1008 case AArch64::STR_PXI:
1009 case AArch64::LDR_PXI: {
1010 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1017 case AArch64::LDPDpost:
1020 case AArch64::STPDpre: {
1021 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1022 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1023 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFRegP_X))
1030 case AArch64::LDPXpost:
1033 case AArch64::STPXpre: {
1036 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1037 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFPLR_X))
1041 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveRegP_X))
1042 .
addImm(RegInfo->getSEHRegNum(Reg0))
1043 .
addImm(RegInfo->getSEHRegNum(Reg1))
1048 case AArch64::LDRDpost:
1051 case AArch64::STRDpre: {
1052 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1053 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFReg_X))
1059 case AArch64::LDRXpost:
1062 case AArch64::STRXpre: {
1063 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1070 case AArch64::STPDi:
1071 case AArch64::LDPDi: {
1072 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1073 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1081 case AArch64::STPXi:
1082 case AArch64::LDPXi: {
1086 int SEHReg0 = RegInfo->getSEHRegNum(Reg0);
1087 int SEHReg1 = RegInfo->getSEHRegNum(Reg1);
1089 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1093 else if (SEHReg0 >= 19 && SEHReg1 >= 19)
1100 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegIP))
1107 case AArch64::STRXui:
1108 case AArch64::LDRXui: {
1109 int Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1116 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegI))
1122 case AArch64::STRDui:
1123 case AArch64::LDRDui: {
1124 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1131 case AArch64::STPQi:
1132 case AArch64::LDPQi: {
1133 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1134 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1135 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQP))
1142 case AArch64::LDPQpost:
1145 case AArch64::STPQpre: {
1146 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1147 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1148 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQPX))
1167 if (ST.isTargetDarwin())
1193 DL =
MBBI->getDebugLoc();
1200 EmitSignRA(MF.
front());
1202 if (
MBB.isEHFuncletEntry())
1204 if (
MBB.isReturnBlock())
1260 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
1265 if (MFI.isVariableSizedObjectIndex(FI)) {
1274 bool FPAfterSVECalleeSaves =
1276 if (MFI.hasScalableStackID(FI)) {
1277 if (FPAfterSVECalleeSaves &&
1280 "split-sve-objects not supported with FPAfterSVECalleeSaves");
1288 AccessOffset = -PPRStackSize;
1289 return AccessOffset +
1294 bool IsFixed = MFI.isFixedObjectIndex(FI);
1299 if (!IsFixed && !IsCSR) {
1300 ScalableOffset = -SVEStackSize;
1301 }
else if (FPAfterSVECalleeSaves && IsCSR) {
1316 int64_t ObjectOffset)
const {
1320 bool IsWin64 = Subtarget.isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
1321 unsigned FixedObject =
1322 getFixedObjectSize(MF, AFI, IsWin64,
false);
1330 int64_t ObjectOffset)
const {
1341 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1342 ? getFPOffset(MF, ObjectOffset).getFixed()
1343 : getStackOffset(MF, ObjectOffset).getFixed();
1348 bool ForSimm)
const {
1350 int64_t ObjectOffset = MFI.getObjectOffset(FI);
1351 bool isFixed = MFI.isFixedObjectIndex(FI);
1354 FrameReg, PreferFP, ForSimm);
1360 bool ForSimm)
const {
1366 int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
1367 int64_t
Offset = getStackOffset(MF, ObjectOffset).getFixed();
1370 bool isSVE = MFI.isScalableStackID(StackID);
1374 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
1385 PreferFP &= !SVEStackSize;
1393 }
else if (isCSR && RegInfo->hasStackRealignment(MF)) {
1397 assert(
hasFP(MF) &&
"Re-aligned stack must have frame pointer");
1399 }
else if (
hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
1404 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
1405 PreferFP |=
Offset > -FPOffset && !SVEStackSize;
1407 if (FPOffset >= 0) {
1411 }
else if (MFI.hasVarSizedObjects()) {
1415 bool CanUseBP = RegInfo->hasBasePointer(MF);
1416 if (FPOffsetFits && CanUseBP)
1423 }
else if (MF.
hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
1430 "Funclets should only be present on Win64");
1434 if (FPOffsetFits && PreferFP)
1441 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
1442 "In the presence of dynamic stack pointer realignment, "
1443 "non-argument/CSR objects cannot be accessed through the frame pointer");
1445 bool FPAfterSVECalleeSaves =
1461 FPOffset -= PPRStackSize;
1463 SPOffset -= PPRStackSize;
1468 if (FPAfterSVECalleeSaves) {
1479 RegInfo->hasStackRealignment(MF))) {
1480 FrameReg = RegInfo->getFrameRegister(MF);
1483 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
1490 if (FPAfterSVECalleeSaves) {
1497 SVEAreaOffset = SVECalleeSavedStack;
1499 SVEAreaOffset = SVECalleeSavedStack - SVEStackSize;
1502 SVEAreaOffset = SVEStackSize;
1504 SVEAreaOffset = SVEStackSize - SVECalleeSavedStack;
1507 if (UseFP && !(isFixed || isCSR))
1508 SVEAreaOffset = -SVEStackSize;
1509 if (!UseFP && (isFixed || isCSR))
1510 SVEAreaOffset = SVEStackSize;
1514 FrameReg = RegInfo->getFrameRegister(MF);
1519 if (RegInfo->hasBasePointer(MF))
1520 FrameReg = RegInfo->getBaseRegister();
1522 assert(!MFI.hasVarSizedObjects() &&
1523 "Can't use SP when we have var sized objects.");
1524 FrameReg = AArch64::SP;
1552 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
1558 unsigned SpillCount,
unsigned Reg1,
1559 unsigned Reg2,
bool NeedsWinCFI,
1569 if (Reg2 == AArch64::FP)
1579 if (
TRI->getEncodingValue(Reg2) ==
TRI->getEncodingValue(Reg1) + 1)
1580 return SpillExtendedVolatile
1581 ? !((Reg1 == AArch64::FP && Reg2 == AArch64::LR) ||
1582 (SpillCount % 2) == 0)
1590 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
1591 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
1601 unsigned SpillCount,
unsigned Reg1,
1602 unsigned Reg2,
bool UsesWinAAPCS,
1603 bool NeedsWinCFI,
bool NeedsFrameRecord,
1608 Reg1, Reg2, NeedsWinCFI, IsFirst,
1613 if (NeedsFrameRecord)
1614 return Reg2 == AArch64::LR;
1626 enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG }
Type;
1627 const TargetRegisterClass *RC;
1629 RegPairInfo() =
default;
1631 bool isPaired()
const {
return Reg2.
isValid(); }
1633 bool isScalable()
const {
return Type == PPR ||
Type == ZPR; }
1639 for (
unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
1640 if (SavedRegs.
test(PReg)) {
1641 unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
1655 bool IsLocallyStreaming =
1661 return Subtarget.hasSVE2p1() ||
1662 (Subtarget.hasSME2() &&
1663 (!IsLocallyStreaming && Subtarget.
isStreaming()));
1671 bool NeedsFrameRecord) {
1689 (
Count & 1) == 0) &&
1690 "Odd number of callee-saved regs to spill!");
1692 int StackFillDir = -1;
1694 unsigned FirstReg = 0;
1702 FirstReg =
Count - 1;
1714 bool SpillExtendedVolatile =
1716 const auto &
Reg = CSI.getReg();
1717 return Reg >= AArch64::X0 &&
Reg <= AArch64::X18;
1720 int ZPRByteOffset = 0;
1721 int PPRByteOffset = 0;
1726 }
else if (!FPAfterSVECalleeSaves) {
1738 for (
unsigned i = FirstReg; i <
Count; i += RegInc) {
1740 RPI.Reg1 = CSI[i].getReg();
1742 if (AArch64::GPR64RegClass.
contains(RPI.Reg1)) {
1743 RPI.Type = RegPairInfo::GPR;
1744 RPI.RC = &AArch64::GPR64RegClass;
1745 }
else if (AArch64::FPR64RegClass.
contains(RPI.Reg1)) {
1746 RPI.Type = RegPairInfo::FPR64;
1747 RPI.RC = &AArch64::FPR64RegClass;
1748 }
else if (AArch64::FPR128RegClass.
contains(RPI.Reg1)) {
1749 RPI.Type = RegPairInfo::FPR128;
1750 RPI.RC = &AArch64::FPR128RegClass;
1751 }
else if (AArch64::ZPRRegClass.
contains(RPI.Reg1)) {
1752 RPI.Type = RegPairInfo::ZPR;
1753 RPI.RC = &AArch64::ZPRRegClass;
1754 }
else if (AArch64::PPRRegClass.
contains(RPI.Reg1)) {
1755 RPI.Type = RegPairInfo::PPR;
1756 RPI.RC = &AArch64::PPRRegClass;
1757 }
else if (RPI.Reg1 == AArch64::VG) {
1758 RPI.Type = RegPairInfo::VG;
1759 RPI.RC = &AArch64::FIXED_REGSRegClass;
1764 int &ScalableByteOffset = RPI.Type == RegPairInfo::PPR && SplitPPRs
1769 if (HasCSHazardPadding &&
1772 ByteOffset += StackFillDir * StackHazardSize;
1775 int Scale =
TRI->getSpillSize(*RPI.RC);
1777 if (
unsigned(i + RegInc) <
Count && !HasCSHazardPadding) {
1778 MCRegister NextReg = CSI[i + RegInc].getReg();
1779 bool IsFirst = i == FirstReg;
1780 unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i;
1782 case RegPairInfo::GPR:
1783 if (AArch64::GPR64RegClass.
contains(NextReg) &&
1785 SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,
1786 NeedsWinCFI, NeedsFrameRecord, IsFirst,
TRI))
1789 case RegPairInfo::FPR64:
1790 if (AArch64::FPR64RegClass.
contains(NextReg) &&
1792 RPI.Reg1, NextReg, NeedsWinCFI,
1796 case RegPairInfo::FPR128:
1797 if (AArch64::FPR128RegClass.
contains(NextReg))
1800 case RegPairInfo::PPR:
1802 case RegPairInfo::ZPR:
1804 ((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) {
1807 int Offset = (ScalableByteOffset + StackFillDir * 2 * Scale) / Scale;
1812 case RegPairInfo::VG:
1823 assert((!RPI.isPaired() ||
1824 (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
1825 "Out of order callee saved regs!");
1827 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
1828 RPI.Reg1 == AArch64::LR) &&
1829 "FrameRecord must be allocated together with LR");
1832 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
1833 RPI.Reg2 == AArch64::LR) &&
1834 "FrameRecord must be allocated together with LR");
1842 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1843 RPI.Reg1 + 1 == RPI.Reg2))) &&
1844 "Callee-save registers not saved as adjacent register pair!");
1846 RPI.FrameIdx = CSI[i].getFrameIdx();
1849 RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
1853 if (RPI.isScalable() && ScalableByteOffset % Scale != 0) {
1854 ScalableByteOffset =
alignTo(ScalableByteOffset, Scale);
1857 int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
1858 assert(OffsetPre % Scale == 0);
1860 if (RPI.isScalable())
1861 ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
1863 ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
1868 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
1869 (IsWindows && RPI.Reg2 == AArch64::LR)))
1870 ByteOffset += StackFillDir * 8;
1874 if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
1875 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
1876 ByteOffset % 16 != 0) {
1877 ByteOffset += 8 * StackFillDir;
1883 NeedGapToAlignStack =
false;
1886 int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
1887 assert(OffsetPost % Scale == 0);
1890 int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
1895 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
1896 (IsWindows && RPI.Reg2 == AArch64::LR)))
1898 RPI.Offset =
Offset / Scale;
1900 assert((!RPI.isPaired() ||
1901 (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
1902 (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
1903 "Offset out of bounds for LDP/STP immediate");
1905 auto isFrameRecord = [&] {
1907 return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR
1908 : RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;
1916 return i > 0 && RPI.Reg1 == AArch64::FP &&
1917 CSI[i - 1].getReg() == AArch64::LR;
1922 if (NeedsFrameRecord && isFrameRecord())
1939 std::reverse(RegPairs.
begin(), RegPairs.
end());
1958 MRI.freezeReservedRegs();
1960 if (homogeneousPrologEpilog(MF)) {
1964 for (
auto &RPI : RegPairs) {
1969 if (!
MRI.isReserved(RPI.Reg1))
1970 MBB.addLiveIn(RPI.Reg1);
1971 if (RPI.isPaired() && !
MRI.isReserved(RPI.Reg2))
1972 MBB.addLiveIn(RPI.Reg2);
1976 bool PTrueCreated =
false;
1992 unsigned Size =
TRI->getSpillSize(*RPI.RC);
1993 Align Alignment =
TRI->getSpillAlign(*RPI.RC);
1995 case RegPairInfo::GPR:
1996 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1998 case RegPairInfo::FPR64:
1999 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
2001 case RegPairInfo::FPR128:
2002 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
2004 case RegPairInfo::ZPR:
2005 StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
2007 case RegPairInfo::PPR:
2008 StrOpc = AArch64::STR_PXI;
2010 case RegPairInfo::VG:
2011 StrOpc = AArch64::STRXui;
2017 if (X0Scratch != AArch64::NoRegister)
2023 if (Reg1 == AArch64::VG) {
2025 Reg1 = findScratchNonCalleeSaveRegister(&
MBB,
true);
2026 assert(Reg1 != AArch64::NoRegister);
2036 return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
2037 AArch64::X0, LiveIn.PhysReg);
2045 RTLIB::Libcall LC = RTLIB::SMEABI_GET_CURRENT_VG;
2047 TRI->getCallPreservedMask(MF, TLI.getLibcallCallingConv(LC));
2061 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
2063 dbgs() <<
", " << RPI.FrameIdx + 1;
2067 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
2068 "Windows unwdinding requires a consecutive (FP,LR) pair");
2072 unsigned FrameIdxReg1 = RPI.FrameIdx;
2073 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2074 if (NeedsWinCFI && RPI.isPaired()) {
2079 if (RPI.isPaired() && RPI.isScalable()) {
2085 "Expects SVE2.1 or SME2 target and a predicate register");
2086#ifdef EXPENSIVE_CHECKS
2087 auto IsPPR = [](
const RegPairInfo &c) {
2088 return c.Reg1 == RegPairInfo::PPR;
2090 auto PPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsPPR);
2091 auto IsZPR = [](
const RegPairInfo &c) {
2092 return c.Type == RegPairInfo::ZPR;
2094 auto ZPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsZPR);
2095 assert(!(PPRBegin < ZPRBegin) &&
2096 "Expected callee save predicate to be handled first");
2098 if (!PTrueCreated) {
2099 PTrueCreated =
true;
2104 if (!
MRI.isReserved(Reg1))
2105 MBB.addLiveIn(Reg1);
2106 if (!
MRI.isReserved(Reg2))
2107 MBB.addLiveIn(Reg2);
2108 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
2124 if (!
MRI.isReserved(Reg1))
2125 MBB.addLiveIn(Reg1);
2126 if (RPI.isPaired()) {
2127 if (!
MRI.isReserved(Reg2))
2128 MBB.addLiveIn(Reg2);
2147 if (RPI.Type == RegPairInfo::ZPR) {
2151 }
else if (RPI.Type == RegPairInfo::PPR) {
2170 DL =
MBBI->getDebugLoc();
2173 if (homogeneousPrologEpilog(MF, &
MBB)) {
2176 for (
auto &RPI : RegPairs) {
2184 auto IsPPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::PPR; };
2186 auto PPREnd = std::find_if_not(PPRBegin, RegPairs.
end(), IsPPR);
2187 std::reverse(PPRBegin, PPREnd);
2188 auto IsZPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::ZPR; };
2190 auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.
end(), IsZPR);
2191 std::reverse(ZPRBegin, ZPREnd);
2193 bool PTrueCreated =
false;
2194 for (
const RegPairInfo &RPI : RegPairs) {
2207 unsigned Size =
TRI->getSpillSize(*RPI.RC);
2208 Align Alignment =
TRI->getSpillAlign(*RPI.RC);
2210 case RegPairInfo::GPR:
2211 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
2213 case RegPairInfo::FPR64:
2214 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
2216 case RegPairInfo::FPR128:
2217 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
2219 case RegPairInfo::ZPR:
2220 LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
2222 case RegPairInfo::PPR:
2223 LdrOpc = AArch64::LDR_PXI;
2225 case RegPairInfo::VG:
2232 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
2234 dbgs() <<
", " << RPI.FrameIdx + 1;
2241 unsigned FrameIdxReg1 = RPI.FrameIdx;
2242 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2243 if (NeedsWinCFI && RPI.isPaired()) {
2249 if (RPI.isPaired() && RPI.isScalable()) {
2254 "Expects SVE2.1 or SME2 target and a predicate register");
2255#ifdef EXPENSIVE_CHECKS
2256 assert(!(PPRBegin < ZPRBegin) &&
2257 "Expected callee save predicate to be handled first");
2259 if (!PTrueCreated) {
2260 PTrueCreated =
true;
2265 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
2282 if (RPI.isPaired()) {
2309 return std::optional<int>(PSV->getFrameIndex());
2320 return std::nullopt;
2326 if (!
MI.mayLoadOrStore() ||
MI.getNumMemOperands() < 1)
2327 return std::nullopt;
2334 return AArch64::PPRRegClass.contains(
MI.getOperand(0).getReg());
2340void AArch64FrameLowering::determineStackHazardSlot(
2343 auto *AFI = MF.
getInfo<AArch64FunctionInfo>();
2344 if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
2358 return AArch64::FPR64RegClass.contains(Reg) ||
2359 AArch64::FPR128RegClass.contains(Reg) ||
2360 AArch64::ZPRRegClass.contains(Reg);
2363 return AArch64::PPRRegClass.contains(Reg);
2365 bool HasFPRStackObjects =
false;
2366 bool HasPPRStackObjects =
false;
2368 enum SlotType : uint8_t {
2379 for (
auto &
MBB : MF) {
2380 for (
auto &
MI :
MBB) {
2382 if (!FI || FI < 0 || FI >
int(SlotTypes.size()))
2389 ? SlotType::ZPRorFPR
2395 for (
int FI = 0; FI < int(SlotTypes.size()); ++FI) {
2396 HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR;
2399 if (SlotTypes[FI] == SlotType::PPR) {
2401 HasPPRStackObjects =
true;
2406 if (HasFPRCSRs || HasFPRStackObjects) {
2409 << StackHazardSize <<
"\n");
2420 LLVM_DEBUG(
dbgs() <<
"Using SplitSVEObjects for SVE CC function\n");
2426 LLVM_DEBUG(
dbgs() <<
"Determining if SplitSVEObjects should be used in "
2427 "non-SVE CC function...\n");
2434 <<
"Calling convention is not supported with SplitSVEObjects\n");
2438 if (!HasPPRCSRs && !HasPPRStackObjects) {
2440 dbgs() <<
"Not using SplitSVEObjects as no PPRs are on the stack\n");
2444 if (!HasFPRCSRs && !HasFPRStackObjects) {
2447 <<
"Not using SplitSVEObjects as no FPRs or ZPRs are on the stack\n");
2451 [[maybe_unused]]
const AArch64Subtarget &Subtarget =
2452 MF.getSubtarget<AArch64Subtarget>();
2454 "Expected SVE to be available for PPRs");
2456 const TargetRegisterInfo *
TRI = MF.getSubtarget().getRegisterInfo();
2460 BitVector FPRZRegs(SavedRegs.
size());
2461 for (
size_t Reg = 0,
E = SavedRegs.
size(); HasFPRCSRs &&
Reg <
E; ++
Reg) {
2462 BitVector::reference RegBit = SavedRegs[
Reg];
2465 unsigned SubRegIdx = 0;
2467 SubRegIdx = AArch64::dsub;
2469 SubRegIdx = AArch64::zsub;
2476 TRI->getMatchingSuperReg(
Reg, SubRegIdx, &AArch64::ZPRRegClass);
2479 SavedRegs |= FPRZRegs;
2499 unsigned UnspilledCSGPR = AArch64::NoRegister;
2500 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2506 RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() :
MCRegister();
2508 unsigned ExtraCSSpill = 0;
2509 bool HasUnpairedGPR64 =
false;
2510 bool HasPairZReg =
false;
2511 BitVector UserReservedRegs = RegInfo->getUserReservedRegs(MF);
2512 BitVector ReservedRegs = RegInfo->getReservedRegs(MF);
2515 for (
unsigned i = 0; CSRegs[i]; ++i) {
2519 if (Reg == BasePointerReg)
2524 if (UserReservedRegs[Reg]) {
2525 SavedRegs.
reset(Reg);
2529 bool RegUsed = SavedRegs.
test(Reg);
2531 const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
2532 if (RegIsGPR64 || AArch64::FPR64RegClass.
contains(Reg) ||
2533 AArch64::FPR128RegClass.
contains(Reg)) {
2536 if (HasUnpairedGPR64)
2537 PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
2539 PairedReg = CSRegs[i ^ 1];
2546 if (RegIsGPR64 && !AArch64::GPR64RegClass.
contains(PairedReg)) {
2547 PairedReg = AArch64::NoRegister;
2548 HasUnpairedGPR64 =
true;
2550 assert(PairedReg == AArch64::NoRegister ||
2551 AArch64::GPR64RegClass.
contains(Reg, PairedReg) ||
2552 AArch64::FPR64RegClass.
contains(Reg, PairedReg) ||
2553 AArch64::FPR128RegClass.
contains(Reg, PairedReg));
2556 if (AArch64::GPR64RegClass.
contains(Reg) && !ReservedRegs[Reg]) {
2557 UnspilledCSGPR = Reg;
2558 UnspilledCSGPRPaired = PairedReg;
2566 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
2567 !SavedRegs.
test(PairedReg)) {
2568 SavedRegs.
set(PairedReg);
2569 if (AArch64::GPR64RegClass.
contains(PairedReg) &&
2570 !ReservedRegs[PairedReg])
2571 ExtraCSSpill = PairedReg;
2574 HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
2575 SavedRegs.
test(CSRegs[i ^ 1]));
2583 if (PnReg.isValid())
2589 SavedRegs.
set(AArch64::P8);
2594 "Predicate cannot be a reserved register");
2604 SavedRegs.
set(AArch64::X18);
2610 determineStackHazardSlot(MF, SavedRegs);
2613 unsigned CSStackSize = 0;
2614 unsigned ZPRCSStackSize = 0;
2615 unsigned PPRCSStackSize = 0;
2617 for (
unsigned Reg : SavedRegs.
set_bits()) {
2619 assert(RC &&
"expected register class!");
2620 auto SpillSize =
TRI->getSpillSize(*RC);
2621 bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
2622 bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
2624 ZPRCSStackSize += SpillSize;
2626 PPRCSStackSize += SpillSize;
2628 CSStackSize += SpillSize;
2634 unsigned NumSavedRegs = SavedRegs.
count();
2647 SavedRegs.
set(AArch64::LR);
2652 windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
2653 SavedRegs.
set(AArch64::FP);
2654 SavedRegs.
set(AArch64::LR);
2658 dbgs() <<
"*** determineCalleeSaves\nSaved CSRs:";
2659 for (
unsigned Reg : SavedRegs.
set_bits())
2665 auto [ZPRLocalStackSize, PPRLocalStackSize] =
2667 uint64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
2669 alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
2670 bool CanEliminateFrame = (SavedRegs.
count() == 0) && !SVEStackSize;
2679 int64_t CalleeStackUsed = 0;
2682 if (FixedOff > CalleeStackUsed)
2683 CalleeStackUsed = FixedOff;
2687 bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
2688 CalleeStackUsed) > EstimatedStackSizeLimit;
2689 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
2699 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2701 <<
" to get a scratch register.\n");
2702 SavedRegs.
set(UnspilledCSGPR);
2703 ExtraCSSpill = UnspilledCSGPR;
2708 if (producePairRegisters(MF)) {
2709 if (UnspilledCSGPRPaired == AArch64::NoRegister) {
2712 SavedRegs.
reset(UnspilledCSGPR);
2713 ExtraCSSpill = AArch64::NoRegister;
2716 SavedRegs.
set(UnspilledCSGPRPaired);
2725 unsigned Size =
TRI->getSpillSize(RC);
2726 Align Alignment =
TRI->getSpillAlign(RC);
2728 RS->addScavengingFrameIndex(FI);
2729 LLVM_DEBUG(
dbgs() <<
"No available CS registers, allocated fi#" << FI
2730 <<
" as the emergency spill slot.\n");
2735 CSStackSize += 8 * (SavedRegs.
count() - NumSavedRegs);
2744 << EstimatedStackSize + AlignedCSStackSize <<
" bytes.\n");
2748 "Should not invalidate callee saved info");
2759 std::vector<CalleeSavedInfo> &CSI,
unsigned &MinCSFrameIndex,
2760 unsigned &MaxCSFrameIndex)
const {
2769 std::reverse(CSI.begin(), CSI.end());
2783 if ((
unsigned)FrameIdx < MinCSFrameIndex)
2784 MinCSFrameIndex = FrameIdx;
2785 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
2786 MaxCSFrameIndex = FrameIdx;
2793 find_if(CSI, [](
auto &Info) {
return Info.getReg() == AArch64::LR; });
2794 if (It != CSI.end())
2795 CSI.insert(It, VGInfo);
2797 CSI.push_back(VGInfo);
2801 int HazardSlotIndex = std::numeric_limits<int>::max();
2802 for (
auto &CS : CSI) {
2810 assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
2811 "Unexpected register order for hazard slot");
2813 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
2816 if ((
unsigned)HazardSlotIndex < MinCSFrameIndex)
2817 MinCSFrameIndex = HazardSlotIndex;
2818 if ((
unsigned)HazardSlotIndex > MaxCSFrameIndex)
2819 MaxCSFrameIndex = HazardSlotIndex;
2822 unsigned Size = RegInfo->getSpillSize(*RC);
2823 Align Alignment(RegInfo->getSpillAlign(*RC));
2825 CS.setFrameIdx(FrameIdx);
2827 if ((
unsigned)FrameIdx < MinCSFrameIndex)
2828 MinCSFrameIndex = FrameIdx;
2829 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
2830 MaxCSFrameIndex = FrameIdx;
2834 Reg == AArch64::FP) {
2837 if ((
unsigned)FrameIdx < MinCSFrameIndex)
2838 MinCSFrameIndex = FrameIdx;
2839 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
2840 MaxCSFrameIndex = FrameIdx;
2847 HazardSlotIndex == std::numeric_limits<int>::max()) {
2849 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
2852 if ((
unsigned)HazardSlotIndex < MinCSFrameIndex)
2853 MinCSFrameIndex = HazardSlotIndex;
2854 if ((
unsigned)HazardSlotIndex > MaxCSFrameIndex)
2855 MaxCSFrameIndex = HazardSlotIndex;
2879 int &Min,
int &Max) {
2880 Min = std::numeric_limits<int>::max();
2881 Max = std::numeric_limits<int>::min();
2887 for (
auto &CS : CSI) {
2888 if (AArch64::ZPRRegClass.
contains(CS.getReg()) ||
2889 AArch64::PPRRegClass.contains(CS.getReg())) {
2890 assert((Max == std::numeric_limits<int>::min() ||
2891 Max + 1 == CS.getFrameIdx()) &&
2892 "SVE CalleeSaves are not consecutive");
2893 Min = std::min(Min, CS.getFrameIdx());
2894 Max = std::max(Max, CS.getFrameIdx());
2897 return Min != std::numeric_limits<int>::max();
2910 uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;
2918 "SVE vectors should never be passed on the stack by value, only by "
2922 auto AllocateObject = [&](
int FI) {
2931 if (Alignment >
Align(16))
2933 "Alignment of scalable vectors > 16 bytes is not yet supported");
2936 StackTop =
alignTo(StackTop, Alignment);
2938 assert(StackTop < (
uint64_t)std::numeric_limits<int64_t>::max() &&
2939 "SVE StackTop far too large?!");
2941 int64_t
Offset = -int64_t(StackTop);
2949 int MinCSFrameIndex, MaxCSFrameIndex;
2951 for (
int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI)
2964 int StackProtectorFI = -1;
2968 ObjectsToAllocate.
push_back(StackProtectorFI);
2974 if (MaxCSFrameIndex >= FI && FI >= MinCSFrameIndex)
2985 for (
unsigned FI : ObjectsToAllocate)
3000 "Upwards growing stack unsupported");
3015 int64_t CurrentOffset =
3019 int FrameIndex =
H.CatchObj.FrameIndex;
3020 if ((FrameIndex != INT_MAX) && MFI.
getObjectOffset(FrameIndex) == 0) {
3031 int64_t UnwindHelpOffset =
alignTo(CurrentOffset + 8,
Align(16));
3032 assert(UnwindHelpOffset == getFixedObjectSize(MF, AFI,
true,
3034 "UnwindHelpOffset must be at the start of the fixed object area");
3037 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
3047 RS->enterBasicBlockEnd(
MBB);
3049 Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
3050 assert(DstReg &&
"There must be a free register after frame setup");
3060struct TagStoreInstr {
3068 MachineFunction *MF;
3069 MachineBasicBlock *
MBB;
3070 MachineRegisterInfo *
MRI;
3079 StackOffset FrameRegOffset;
3083 std::optional<int64_t> FrameRegUpdate;
3085 unsigned FrameRegUpdateFlags;
3095 TagStoreEdit(MachineBasicBlock *
MBB,
bool ZeroData)
3096 :
MBB(
MBB), ZeroData(ZeroData) {
3102 void addInstruction(TagStoreInstr
I) {
3104 TagStores.
back().Offset + TagStores.
back().Size ==
I.Offset) &&
3105 "Non-adjacent tag store instructions.");
3108 void clear() { TagStores.
clear(); }
3113 const AArch64FrameLowering *TFI,
bool TryMergeSPUpdate);
3120 const int64_t kMinOffset = -256 * 16;
3121 const int64_t kMaxOffset = 255 * 16;
3124 int64_t BaseRegOffsetBytes = FrameRegOffset.
getFixed();
3125 if (BaseRegOffsetBytes < kMinOffset ||
3126 BaseRegOffsetBytes + (
Size -
Size % 32) > kMaxOffset ||
3130 BaseRegOffsetBytes % 16 != 0) {
3131 Register ScratchReg =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
3135 BaseRegOffsetBytes = 0;
3140 int64_t InstrSize = (
Size > 16) ? 32 : 16;
3143 ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
3145 assert(BaseRegOffsetBytes % 16 == 0);
3149 .
addImm(BaseRegOffsetBytes / 16)
3153 if (BaseRegOffsetBytes == 0)
3155 BaseRegOffsetBytes += InstrSize;
3169 :
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
3170 Register SizeReg =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
3174 int64_t LoopSize =
Size;
3177 if (FrameRegUpdate && *FrameRegUpdate)
3178 LoopSize -= LoopSize % 32;
3180 TII->get(ZeroData ? AArch64::STZGloop_wback
3181 : AArch64::STGloop_wback))
3188 LoopI->
setFlags(FrameRegUpdateFlags);
3190 int64_t ExtraBaseRegUpdate =
3191 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.
getFixed() -
Size) : 0;
3192 LLVM_DEBUG(
dbgs() <<
"TagStoreEdit::emitLoop: LoopSize=" << LoopSize
3193 <<
", Size=" <<
Size
3194 <<
", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate
3195 <<
", FrameRegUpdate=" << FrameRegUpdate
3196 <<
", FrameRegOffset.getFixed()="
3197 << FrameRegOffset.
getFixed() <<
"\n");
3198 if (LoopSize <
Size) {
3202 int64_t STGOffset = ExtraBaseRegUpdate + 16;
3203 assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&
3204 "STG immediate out of range");
3206 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
3213 }
else if (ExtraBaseRegUpdate) {
3215 int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);
3216 assert(AddSubOffset <= 4095 &&
"ADD/SUB immediate out of range");
3219 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
3232 int64_t
Size, int64_t *TotalOffset) {
3234 if ((
MI.getOpcode() == AArch64::ADDXri ||
3235 MI.getOpcode() == AArch64::SUBXri) &&
3236 MI.getOperand(0).getReg() ==
Reg &&
MI.getOperand(1).getReg() ==
Reg) {
3238 int64_t
Offset =
MI.getOperand(2).getImm() << Shift;
3239 if (
MI.getOpcode() == AArch64::SUBXri)
3250 const int64_t kMaxOffset = 4080 - 16;
3252 const int64_t kMinOffset = -4095;
3253 if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&
3254 PostOffset % 16 == 0) {
3265 for (
auto &TS : TSE) {
3269 if (
MI->memoperands_empty()) {
3273 MemRefs.
append(
MI->memoperands_begin(),
MI->memoperands_end());
3279 bool TryMergeSPUpdate) {
3280 if (TagStores.
empty())
3282 TagStoreInstr &FirstTagStore = TagStores[0];
3283 TagStoreInstr &LastTagStore = TagStores[TagStores.
size() - 1];
3284 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
3285 DL = TagStores[0].MI->getDebugLoc();
3289 *MF, FirstTagStore.Offset,
false ,
3293 FrameRegUpdate = std::nullopt;
3295 mergeMemRefs(TagStores, CombinedMemRefs);
3298 dbgs() <<
"Replacing adjacent STG instructions:\n";
3299 for (
const auto &Instr : TagStores) {
3308 if (TagStores.
size() < 2)
3310 emitUnrolled(InsertI);
3313 int64_t TotalOffset = 0;
3314 if (TryMergeSPUpdate) {
3320 if (InsertI !=
MBB->
end() &&
3321 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.
getFixed() +
Size,
3323 UpdateInstr = &*InsertI++;
3329 if (!UpdateInstr && TagStores.
size() < 2)
3333 FrameRegUpdate = TotalOffset;
3334 FrameRegUpdateFlags = UpdateInstr->
getFlags();
3341 for (
auto &TS : TagStores)
3342 TS.MI->eraseFromParent();
3346 int64_t &
Size,
bool &ZeroData) {
3350 unsigned Opcode =
MI.getOpcode();
3351 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
3352 Opcode == AArch64::STZ2Gi);
3354 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
3355 if (!
MI.getOperand(0).isDead() || !
MI.getOperand(1).isDead())
3357 if (!
MI.getOperand(2).isImm() || !
MI.getOperand(3).isFI())
3360 Size =
MI.getOperand(2).getImm();
3364 if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
3366 else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
3371 if (
MI.getOperand(0).getReg() != AArch64::SP || !
MI.getOperand(1).isFI())
3375 16 *
MI.getOperand(2).getImm();
3395 if (!isMergeableStackTaggingInstruction(
MI,
Offset,
Size, FirstZeroData))
3401 constexpr int kScanLimit = 10;
3404 NextI !=
E &&
Count < kScanLimit; ++NextI) {
3413 if (isMergeableStackTaggingInstruction(
MI,
Offset,
Size, ZeroData)) {
3414 if (ZeroData != FirstZeroData)
3422 if (!
MI.isTransient())
3431 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
MI.isCall())
3447 LiveRegs.addLiveOuts(*
MBB);
3452 LiveRegs.stepBackward(*
I);
3455 if (LiveRegs.contains(AArch64::NZCV))
3459 [](
const TagStoreInstr &
Left,
const TagStoreInstr &
Right) {
3464 int64_t CurOffset = Instrs[0].Offset;
3465 for (
auto &Instr : Instrs) {
3466 if (CurOffset >
Instr.Offset)
3473 TagStoreEdit TSE(
MBB, FirstZeroData);
3474 std::optional<int64_t> EndOffset;
3475 for (
auto &Instr : Instrs) {
3476 if (EndOffset && *EndOffset !=
Instr.Offset) {
3478 TSE.emitCode(InsertI, TFI,
false);
3482 TSE.addInstruction(Instr);
3501 II = tryMergeAdjacentSTG(
II,
this, RS);
3508 shouldSignReturnAddressEverywhere(MF))
3517 bool IgnoreSPUpdates)
const {
3519 if (IgnoreSPUpdates) {
3522 FrameReg = AArch64::SP;
3532 FrameReg = AArch64::SP;
3557 bool IsValid =
false;
3559 int ObjectIndex = 0;
3561 int GroupIndex = -1;
3563 bool ObjectFirst =
false;
3566 bool GroupFirst =
false;
3571 enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 };
3575 SmallVector<int, 8> CurrentMembers;
3576 int NextGroupIndex = 0;
3577 std::vector<FrameObject> &Objects;
3580 GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
3581 void AddMember(
int Index) { CurrentMembers.
push_back(Index); }
3582 void EndCurrentGroup() {
3583 if (CurrentMembers.
size() > 1) {
3588 for (
int Index : CurrentMembers) {
3589 Objects[
Index].GroupIndex = NextGroupIndex;
3595 CurrentMembers.clear();
3599bool FrameObjectCompare(
const FrameObject &
A,
const FrameObject &
B) {
3621 return std::make_tuple(!
A.IsValid,
A.Accesses,
A.ObjectFirst,
A.GroupFirst,
3622 A.GroupIndex,
A.ObjectIndex) <
3623 std::make_tuple(!
B.IsValid,
B.Accesses,
B.ObjectFirst,
B.GroupFirst,
3624 B.GroupIndex,
B.ObjectIndex);
3633 ObjectsToAllocate.
empty())
3638 for (
auto &Obj : ObjectsToAllocate) {
3639 FrameObjects[Obj].IsValid =
true;
3640 FrameObjects[Obj].ObjectIndex = Obj;
3645 GroupBuilder GB(FrameObjects);
3646 for (
auto &
MBB : MF) {
3647 for (
auto &
MI :
MBB) {
3648 if (
MI.isDebugInstr())
3653 if (FI && *FI >= 0 && *FI < (
int)FrameObjects.size()) {
3656 FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
3658 FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
3663 switch (
MI.getOpcode()) {
3664 case AArch64::STGloop:
3665 case AArch64::STZGloop:
3669 case AArch64::STZGi:
3670 case AArch64::ST2Gi:
3671 case AArch64::STZ2Gi:
3684 FrameObjects[FI].IsValid)
3692 GB.AddMember(TaggedFI);
3694 GB.EndCurrentGroup();
3697 GB.EndCurrentGroup();
3702 FrameObject::AccessHazard;
3704 for (
auto &Obj : FrameObjects)
3705 if (!Obj.Accesses ||
3706 Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))
3707 Obj.Accesses = FrameObject::AccessGPR;
3716 FrameObjects[*TBPI].ObjectFirst =
true;
3717 FrameObjects[*TBPI].GroupFirst =
true;
3718 int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
3719 if (FirstGroupIndex >= 0)
3720 for (FrameObject &Object : FrameObjects)
3721 if (Object.GroupIndex == FirstGroupIndex)
3722 Object.GroupFirst =
true;
3728 for (
auto &Obj : FrameObjects) {
3732 ObjectsToAllocate[i++] = Obj.ObjectIndex;
3736 dbgs() <<
"Final frame order:\n";
3737 for (
auto &Obj : FrameObjects) {
3740 dbgs() <<
" " << Obj.ObjectIndex <<
": group " << Obj.GroupIndex;
3741 if (Obj.ObjectFirst)
3742 dbgs() <<
", first";
3744 dbgs() <<
", group-first";
3755AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
3766 MF.
insert(MBBInsertPoint, LoopMBB);
3768 MF.
insert(MBBInsertPoint, ExitMBB);
3799 MBB.addSuccessor(LoopMBB);
3803 return ExitMBB->
begin();
3806void AArch64FrameLowering::inlineStackProbeFixed(
3811 const AArch64InstrInfo *
TII =
3813 AArch64FunctionInfo *AFI = MF.
getInfo<AArch64FunctionInfo>();
3818 int64_t ProbeSize = MF.
getInfo<AArch64FunctionInfo>()->getStackProbeSize();
3819 int64_t NumBlocks = FrameSize / ProbeSize;
3820 int64_t ResidualSize = FrameSize % ProbeSize;
3822 LLVM_DEBUG(
dbgs() <<
"Stack probing: total " << FrameSize <<
" bytes, "
3823 << NumBlocks <<
" blocks of " << ProbeSize
3824 <<
" bytes, plus " << ResidualSize <<
" bytes\n");
3829 for (
int i = 0; i < NumBlocks; ++i) {
3835 EmitAsyncCFI && !HasFP, CFAOffset);
3844 }
else if (NumBlocks != 0) {
3850 EmitAsyncCFI && !HasFP, CFAOffset);
3852 MBBI = inlineStackProbeLoopExactMultiple(
MBBI, ProbeSize, ScratchReg);
3854 if (EmitAsyncCFI && !HasFP) {
3857 .buildDefCFARegister(AArch64::SP);
3861 if (ResidualSize != 0) {
3867 EmitAsyncCFI && !HasFP, CFAOffset);
3884 SmallVector<MachineInstr *, 4> ToReplace;
3885 for (MachineInstr &
MI :
MBB)
3886 if (
MI.getOpcode() == AArch64::PROBED_STACKALLOC ||
3887 MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)
3890 for (MachineInstr *
MI : ToReplace) {
3891 if (
MI->getOpcode() == AArch64::PROBED_STACKALLOC) {
3892 Register ScratchReg =
MI->getOperand(0).getReg();
3893 int64_t FrameSize =
MI->getOperand(1).getImm();
3895 MI->getOperand(3).getImm());
3896 inlineStackProbeFixed(
MI->getIterator(), ScratchReg, FrameSize,
3899 assert(
MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&
3900 "Stack probe pseudo-instruction expected");
3901 const AArch64InstrInfo *
TII =
3902 MI->getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo();
3903 Register TargetReg =
MI->getOperand(0).getReg();
3904 (void)
TII->probedStackAlloc(
MI->getIterator(), TargetReg,
true);
3906 MI->eraseFromParent();
3926 return std::make_tuple(
start(),
Idx) <
3927 std::make_tuple(Rhs.
start(), Rhs.
Idx);
3957 << (
Offset.getFixed() < 0 ?
"" :
"+") <<
Offset.getFixed();
3958 if (
Offset.getScalable())
3959 OS << (
Offset.getScalable() < 0 ?
"" :
"+") <<
Offset.getScalable()
3970void AArch64FrameLowering::emitRemarks(
3973 auto *AFI = MF.
getInfo<AArch64FunctionInfo>();
3978 const uint64_t HazardSize =
3981 if (HazardSize == 0)
3989 std::vector<StackAccess> StackAccesses(MFI.
getNumObjects());
3991 size_t NumFPLdSt = 0;
3992 size_t NumNonFPLdSt = 0;
3995 for (
const MachineBasicBlock &
MBB : MF) {
3996 for (
const MachineInstr &
MI :
MBB) {
3997 if (!
MI.mayLoadOrStore() ||
MI.getNumMemOperands() < 1)
3999 for (MachineMemOperand *MMO :
MI.memoperands()) {
4006 StackAccesses[ArrIdx].Idx = FrameIdx;
4007 StackAccesses[ArrIdx].Offset =
4018 StackAccesses[ArrIdx].AccessTypes |= RegTy;
4029 if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
4040 if (StackAccesses.front().isMixed())
4041 MixedObjects.push_back(&StackAccesses.front());
4043 for (
auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
4045 const auto &
First = *It;
4046 const auto &Second = *(It + 1);
4048 if (Second.isMixed())
4049 MixedObjects.push_back(&Second);
4051 if ((
First.isSME() && Second.isCPU()) ||
4052 (
First.isCPU() && Second.isSME())) {
4053 uint64_t Distance =
static_cast<uint64_t
>(Second.start() -
First.end());
4054 if (Distance < HazardSize)
4059 auto EmitRemark = [&](llvm::StringRef Str) {
4061 auto R = MachineOptimizationRemarkAnalysis(
4062 "sme",
"StackHazard", MF.getFunction().getSubprogram(), &MF.front());
4063 return R <<
formatv(
"stack hazard in '{0}': ", MF.getName()).str() << Str;
4067 for (
const auto &
P : HazardPairs)
4068 EmitRemark(
formatv(
"{0} is too close to {1}", *
P.first, *
P.second).str());
4070 for (
const auto *Obj : MixedObjects)
4072 formatv(
"{0} accessed by both GP and FP instructions", *Obj).str());
unsigned const MachineRegisterInfo * MRI
static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB)
static const unsigned DefaultSafeSPDisplacement
This is the biggest offset to the stack pointer we can encode in aarch64 instructions (without using ...
static bool produceCompactUnwindFrame(const AArch64FrameLowering &, MachineFunction &MF)
static cl::opt< bool > StackTaggingMergeSetTag("stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden)
bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget, MachineFunction &MF)
static std::optional< int > getLdStFrameID(const MachineInstr &MI, const MachineFrameInfo &MFI)
static cl::opt< bool > SplitSVEObjects("aarch64-split-sve-objects", cl::desc("Split allocation of ZPR & PPR objects"), cl::init(true), cl::Hidden)
static cl::opt< bool > StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", cl::init(false), cl::Hidden)
void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, MachineFunction &MF, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI, SmallVectorImpl< RegPairInfo > &RegPairs, bool NeedsFrameRecord)
static cl::opt< bool > OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden)
static bool invalidateRegisterPairing(bool SpillExtendedVolatile, unsigned SpillCount, unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord, bool IsFirst, const TargetRegisterInfo *TRI)
Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
static cl::opt< bool > DisableMultiVectorSpillFill("aarch64-disable-multivector-spill-fill", cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden)
cl::opt< bool > EnableHomogeneousPrologEpilog("homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)"))
static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL, const MachineFunction &MF)
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg)
static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, AssignObjectOffsets AssignOffsets)
Process all the SVE stack objects and the SVE stack size and offsets for each object.
static bool isTargetWindows(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max)
returns true if there are any SVE callee saves.
static cl::opt< unsigned > StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), cl::Hidden)
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE)
static unsigned getStackHazardSize(const MachineFunction &MF)
static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile, unsigned SpillCount, unsigned Reg1, unsigned Reg2, bool NeedsWinCFI, bool IsFirst, const TargetRegisterInfo *TRI)
MCRegister findFreePredicateReg(BitVector &SavedRegs)
static bool isPPRAccess(const MachineInstr &MI)
static std::optional< int > getMMOFrameID(MachineMemOperand *MMO, const MachineFrameInfo &MFI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
static const int kSetTagLoopThreshold
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
#define CASE(ATTRNAME, AANAME,...)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
const HexagonInstrInfo * TII
static std::string getTypeString(Type *T)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
uint64_t IntrinsicInst * II
This file declares the machine register scavenger class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
void emitEpilogue()
Emit the epilogue.
StackOffset getSVEStackSize(const MachineFunction &MF) const
Returns the size of the entire SVE stackframe (PPRs + ZPRs).
StackOffset getZPRStackSize(const MachineFunction &MF) const
Returns the size of the entire ZPR stackframe (calleesaves + spills).
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool enableStackSlotScavenging(const MachineFunction &MF) const override
Returns true if the stack slot holes in the fixed and callee-save stack area should be used when allo...
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
bool enableFullCFIFixup(const MachineFunction &MF) const override
enableFullCFIFixup - Returns true if we may need to fix the unwind information such that it is accura...
StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI) const override
getFrameIndexReferenceFromSP - This method returns the offset from the stack pointer to the slot of t...
bool enableCFIFixup(const MachineFunction &MF) const override
Returns true if we may need to fix the unwind information for the function.
StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const override
getNonLocalFrameIndexReference - This method returns the offset used to reference a frame index locat...
TargetStackID::Value getStackIDForScalableVectors() const override
Returns the StackID that scalable vectors should be associated with.
friend class AArch64PrologueEmitter
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
friend class AArch64EpilogueEmitter
void resetCFIToInitialState(MachineBasicBlock &MBB) const override
Emit CFI instructions that recreate the state of the unwind information upon function entry.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
StackOffset resolveFrameOffsetReference(const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, TargetStackID::Value StackID, Register &FrameReg, bool PreferFP, bool ForSimm) const
bool canUseRedZone(const MachineFunction &MF) const
Can this function use the red zone for local allocations.
bool needsWinCFI(const MachineFunction &MF) const
bool isFPReserved(const MachineFunction &MF) const
Should the Frame Pointer be reserved for the current function?
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const
Funclets only need to account for space for the callee saved registers, as the locals are accounted f...
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack frame.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
StackOffset getPPRStackSize(const MachineFunction &MF) const
Returns the size of the entire PPR stackframe (calleesaves + spills + hazard padding).
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
For Win64 AArch64 EH, the offset to the Unwind object is from the SP before the update.
StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve the parent's frame pointer...
bool requiresSaveVG(const MachineFunction &MF) const
void emitPacRetPlusLeafHardening(MachineFunction &MF) const
Harden the entire function with pac-ret.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
unsigned getPPRCalleeSavedStackSize() const
void setHasStackFrame(bool s)
void setSwiftAsyncContextFrameIdx(int FI)
unsigned getTailCallReservedStack() const
unsigned getCalleeSavedStackSize(const MachineFrameInfo &MFI) const
void setCalleeSaveBaseToFrameRecordOffset(int Offset)
bool hasStackProbing() const
unsigned getArgumentStackToRestore() const
void setCalleeSaveStackHasFreeSpace(bool s)
int getCalleeSaveBaseToFrameRecordOffset() const
bool hasStreamingModeChanges() const
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setPredicateRegForFillSpill(unsigned Reg)
int getStackHazardSlotIndex() const
void setCalleeSavedStackSize(unsigned Size)
void setSplitSVEObjects(bool s)
bool hasStackFrame() const
void setStackSizeSVE(uint64_t ZPR, uint64_t PPR)
std::optional< int > getTaggedBasePointerIndex() const
SMEAttrs getSMEFnAttrs() const
uint64_t getLocalStackSize() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
unsigned getVarArgsGPRSize() const
uint64_t getStackSizePPR() const
bool hasSwiftAsyncContext() const
bool hasStackHazardSlotIndex() const
void setStackHazardSlotIndex(int Index)
unsigned getZPRCalleeSavedStackSize() const
void setStackHazardCSRSlotIndex(int Index)
unsigned getPredicateRegForFillSpill() const
void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR)
bool hasCalculatedStackSizeSVE() const
uint64_t getStackSizeZPR() const
bool hasSVEStackSize() const
bool isStackHazardIncludedInCalleeSaveArea() const
unsigned getSVECalleeSavedStackSize() const
bool hasSplitSVEObjects() const
bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const
bool hasCalleeSaveStackFreeSpace() const
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
void emitPrologue()
Emit the prologue.
bool isTargetWindows() const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isTargetMachO() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
bool isStreaming() const
Returns true if the function has a streaming body.
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
unsigned getRedZoneSize(const Function &F) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool test(unsigned Idx) const
size_type count() const
count - Returns the number of bits which are set.
iterator_range< const_set_bits_iterator > set_bits() const
size_type size() const
size - Returns the number of bits in this bitvector.
Helper class for creating CFI instructions and inserting them into MIR.
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
AttributeList getAttributes() const
Return the attribute list for this Function.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
bool usesWindowsCFI() const
Wrapper class representing physical registers. Should be passed by value.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MachineInstr & instr_back()
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
reverse_iterator rbegin()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasScalableStackID(int ObjectIdx) const
int getStackProtectorIndex() const
Return the index for the stack protector object.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isMaxCallFrameSizeComputed() const
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getNumObjects() const
Return the number of objects.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackProtectorIndex() const
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool hasEHFunclets() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
void setFlags(unsigned flags)
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
uint32_t getFlags() const
Return the MI flags bitvector.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI bool isLiveIn(Register Reg) const
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingInterface() const
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingBody() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
int64_t getScalable() const
Returns the scalable component of the stack.
static StackOffset get(int64_t Fixed, int64_t Scalable)
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
virtual bool enableCFIFixup(const MachineFunction &MF) const
Returns true if we may need to fix the unwind information for the function.
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
LLVM_ABI bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Triple - Helper class for working with autoconf configuration names.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
const unsigned StackProbeMaxLoopUnroll
Maximum number of iterations to unroll for a constant size probing loop.
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AArch64_SVE_VectorCall
Used between AArch64 SVE functions.
@ PreserveMost
Used for runtime calls that preserves most registers.
@ CXX_FAST_TLS
Used for access functions.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ PreserveNone
Used for runtime calls that preserves none general registers.
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ C
The default llvm calling convention, compatible with C.
@ Define
Register definition.
@ ScalablePredicateVector
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ LLVM_MARK_AS_BITMASK_ENUM
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool operator<(const StackAccess &Rhs) const
void print(raw_ostream &OS) const
std::string getTypeString() const
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Pair of physical register and lane mask.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
SmallVector< WinEHHandlerType, 1 > HandlerArray