71#define DEBUG_TYPE "aarch64-machine-sme-abi"
109 Register StatusFlags = AArch64::NoRegister;
110 Register X0Save = AArch64::NoRegister;
116 ZAState NeededState{ZAState::ANY};
118 LiveRegs PhysLiveRegs = LiveRegs::None;
124 ZAState FixedEntryState{ZAState::ANY};
126 LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
127 LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
133 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
134 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
141 EmitContext() =
default;
146 return *TPIDR2BlockFI;
149 return *TPIDR2BlockFI;
154 if (AgnosticZABufferPtr != AArch64::NoRegister)
155 return AgnosticZABufferPtr;
158 AgnosticZABufferPtr =
159 BufferPtr != AArch64::NoRegister
162 return AgnosticZABufferPtr;
167 bool needsSaveBuffer()
const {
168 assert(!(TPIDR2BlockFI && AgnosticZABufferPtr) &&
169 "Cannot have both a TPIDR2 block and agnostic ZA buffer");
170 return TPIDR2BlockFI || AgnosticZABufferPtr != AArch64::NoRegister;
174 std::optional<int> TPIDR2BlockFI;
175 Register AgnosticZABufferPtr = AArch64::NoRegister;
178static bool isLegalEdgeBundleZAState(ZAState State) {
180 case ZAState::ACTIVE:
181 case ZAState::LOCAL_SAVED:
188StringRef getZAStateString(ZAState State) {
189#define MAKE_CASE(V) \
209 return AArch64::MPR128RegClass.contains(SR) ||
210 AArch64::ZTRRegClass.contains(SR);
216static std::pair<ZAState, MachineBasicBlock::iterator>
218 bool ZAOffAtReturn) {
221 if (
MI.getOpcode() == AArch64::InOutZAUsePseudo)
222 return {ZAState::ACTIVE, std::prev(InsertPt)};
224 if (
MI.getOpcode() == AArch64::RequiresZASavePseudo)
225 return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
228 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
230 for (
auto &MO :
MI.operands()) {
231 if (isZAorZTRegOp(
TRI, MO))
232 return {ZAState::ACTIVE, InsertPt};
235 return {ZAState::ANY, InsertPt};
239 inline static char ID = 0;
245 StringRef getPassName()
const override {
return "Machine SME ABI pass"; }
257 FunctionInfo collectNeededZAStates(
SMEAttrs SMEFnAttrs);
262 const FunctionInfo &FnInfo);
266 void insertStateChanges(EmitContext &,
const FunctionInfo &FnInfo,
292 LiveRegs PhysLiveRegs,
bool IsSave);
300 std::pair<MachineBasicBlock::iterator, LiveRegs>
310 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
311 return emitFullZASaveRestore(Context,
MBB,
MBBI, PhysLiveRegs,
313 return emitSetupLazySave(Context,
MBB,
MBBI);
317 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
318 return emitFullZASaveRestore(Context,
MBB,
MBBI, PhysLiveRegs,
320 return emitRestoreLazySave(Context,
MBB,
MBBI, PhysLiveRegs);
325 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
326 return emitAllocateFullZASaveBuffer(Context,
MBB,
MBBI, PhysLiveRegs);
327 return emitAllocateLazySaveBuffer(Context,
MBB,
MBBI);
347 LiveRegs PhysLiveRegs = LiveRegs::None;
349 PhysLiveRegs |= LiveRegs::NZCV;
353 PhysLiveRegs |= LiveRegs::W0;
354 if (!LiveUnits.
available(AArch64::W0_HI))
355 PhysLiveRegs |= LiveRegs::W0_HI;
360 if (PhysLiveRegs & LiveRegs::NZCV)
361 LiveUnits.
addReg(AArch64::NZCV);
362 if (PhysLiveRegs & LiveRegs::W0)
363 LiveUnits.
addReg(AArch64::W0);
364 if (PhysLiveRegs & LiveRegs::W0_HI)
365 LiveUnits.
addReg(AArch64::W0_HI);
368FunctionInfo MachineSMEABI::collectNeededZAStates(
SMEAttrs SMEFnAttrs) {
371 "Expected function to have ZA/ZT0 state!");
374 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
375 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
378 BlockInfo &
Block = Blocks[
MBB.getNumber()];
380 if (
MBB.isEntryBlock()) {
383 ? ZAState::CALLER_DORMANT
385 }
else if (
MBB.isEHPad()) {
387 Block.FixedEntryState = ZAState::LOCAL_SAVED;
393 Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits);
394 auto FirstTerminatorInsertPt =
MBB.getFirstTerminator();
395 auto FirstNonPhiInsertPt =
MBB.getFirstNonPHI();
399 LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits);
404 if (
MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
405 AfterSMEProloguePt =
MBBI;
406 PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
409 auto [NeededState, InsertPt] = getZAStateBeforeInst(
412 InsertPt->getOpcode() == AArch64::ADJCALLSTACKDOWN) &&
413 "Unexpected state change insertion point!");
415 if (
MBBI == FirstTerminatorInsertPt)
416 Block.PhysLiveRegsAtExit = PhysLiveRegs;
417 if (
MBBI == FirstNonPhiInsertPt)
418 Block.PhysLiveRegsAtEntry = PhysLiveRegs;
419 if (NeededState != ZAState::ANY)
420 Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
424 std::reverse(
Block.Insts.begin(),
Block.Insts.end());
427 return FunctionInfo{std::move(Blocks), AfterSMEProloguePt,
428 PhysLiveRegsAfterSMEPrologue};
434MachineSMEABI::assignBundleZAStates(
const EdgeBundles &Bundles,
435 const FunctionInfo &FnInfo) {
438 LLVM_DEBUG(
dbgs() <<
"Assigning ZA state for edge bundle: " <<
I <<
'\n');
445 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
446 for (
unsigned BlockID : Bundles.
getBlocks(
I)) {
449 const BlockInfo &
Block = FnInfo.Blocks[BlockID];
450 if (
Block.Insts.empty()) {
454 bool InEdge = Bundles.
getBundle(BlockID,
false) ==
I;
455 bool OutEdge = Bundles.
getBundle(BlockID,
true) ==
I;
457 ZAState DesiredIncomingState =
Block.Insts.front().NeededState;
458 if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
459 EdgeStateCounts[DesiredIncomingState]++;
461 << getZAStateString(DesiredIncomingState));
463 ZAState DesiredOutgoingState =
Block.Insts.back().NeededState;
464 if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
465 EdgeStateCounts[DesiredOutgoingState]++;
467 << getZAStateString(DesiredOutgoingState));
472 ZAState BundleState =
473 ZAState(
max_element(EdgeStateCounts) - EdgeStateCounts);
477 if (BundleState == ZAState::ANY)
478 BundleState = ZAState::ACTIVE;
481 dbgs() <<
"Chosen ZA state: " << getZAStateString(BundleState) <<
'\n'
484 dbgs() <<
" " << getZAStateString(ZAState(State)) <<
": " <<
Count;
488 BundleStates[
I] = BundleState;
494std::pair<MachineBasicBlock::iterator, LiveRegs>
495MachineSMEABI::findStateChangeInsertionPoint(
500 if (Inst !=
Block.Insts.end()) {
501 InsertPt = Inst->InsertPt;
502 PhysLiveRegs = Inst->PhysLiveRegs;
504 InsertPt =
MBB.getFirstTerminator();
505 PhysLiveRegs =
Block.PhysLiveRegsAtExit;
508 if (!(PhysLiveRegs & LiveRegs::NZCV))
509 return {InsertPt, PhysLiveRegs};
513 if (Inst ==
Block.Insts.begin()) {
514 PrevStateChangeI =
MBB.begin();
520 PrevStateChangeI = std::prev(Inst)->InsertPt;
525 setPhysLiveRegs(LiveUnits, PhysLiveRegs);
528 if (
I->getOpcode() ==
TII->getCallFrameDestroyOpcode() ||
I->isCall())
532 return {
I, getPhysLiveRegs(LiveUnits)};
534 return {InsertPt, PhysLiveRegs};
537void MachineSMEABI::insertStateChanges(EmitContext &Context,
538 const FunctionInfo &FnInfo,
542 const BlockInfo &
Block = FnInfo.Blocks[
MBB.getNumber()];
543 ZAState InState = BundleStates[Bundles.
getBundle(
MBB.getNumber(),
546 ZAState CurrentState =
Block.FixedEntryState;
547 if (CurrentState == ZAState::ANY)
548 CurrentState = InState;
550 for (
auto &Inst :
Block.Insts) {
551 if (CurrentState != Inst.NeededState) {
552 auto [InsertPt, PhysLiveRegs] =
553 findStateChangeInsertionPoint(
MBB,
Block, &Inst);
554 emitStateChange(Context,
MBB, InsertPt, CurrentState, Inst.NeededState,
556 CurrentState = Inst.NeededState;
560 if (
MBB.succ_empty())
565 if (CurrentState != OutState) {
566 auto [InsertPt, PhysLiveRegs] =
567 findStateChangeInsertionPoint(
MBB,
Block,
Block.Insts.end());
568 emitStateChange(Context,
MBB, InsertPt, CurrentState, OutState,
577 return MBBI->getDebugLoc();
581void MachineSMEABI::emitSetupLazySave(EmitContext &Context,
587 Register TPIDR2 =
MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
588 Register TPIDR2Ptr =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
597 .
addImm(AArch64SysReg::TPIDR2_EL0)
601PhysRegSave MachineSMEABI::createPhysRegSave(
LiveRegs PhysLiveRegs,
605 PhysRegSave RegSave{PhysLiveRegs};
606 if (PhysLiveRegs & LiveRegs::NZCV) {
607 RegSave.StatusFlags =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
609 .
addImm(AArch64SysReg::NZCV)
614 if (PhysLiveRegs & LiveRegs::W0) {
615 RegSave.X0Save =
MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI
616 ? &AArch64::GPR64RegClass
617 : &AArch64::GPR32RegClass);
619 .
addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0);
624void MachineSMEABI::restorePhyRegSave(
const PhysRegSave &RegSave,
628 if (RegSave.StatusFlags != AArch64::NoRegister)
630 .
addImm(AArch64SysReg::NZCV)
631 .
addReg(RegSave.StatusFlags)
634 if (RegSave.X0Save != AArch64::NoRegister)
636 RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0)
640void MachineSMEABI::emitRestoreLazySave(EmitContext &Context,
646 Register TPIDR2EL0 =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
650 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
654 .
addImm(AArch64SVCR::SVCRZA)
658 .
addImm(AArch64SysReg::TPIDR2_EL0);
669 .
addRegMask(
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
672 .
addImm(AArch64SysReg::TPIDR2_EL0)
675 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
685 .
addImm(AArch64SysReg::TPIDR2_EL0)
690 .
addImm(AArch64SVCR::SVCRZA)
694void MachineSMEABI::emitAllocateLazySaveBuffer(
699 Register SP =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
700 Register SVL =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
707 if (Buffer == AArch64::NoRegister) {
715 "Lazy ZA save is not yet supported on Windows");
716 Buffer =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
737 "TPIDR2 block initialization is not supported on big-endian targets");
755 Register TPIDR2EL0 =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
758 .
addImm(AArch64SysReg::TPIDR2_EL0);
767 .
addRegMask(
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
772 .
addImm(AArch64SVCR::SVCRZA)
776void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context,
779 LiveRegs PhysLiveRegs,
bool IsSave) {
784 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
788 .
addReg(Context.getAgnosticZABufferPtr(*MF));
794 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE))
799 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
802void MachineSMEABI::emitAllocateFullZASaveBuffer(
810 Register BufferPtr = Context.getAgnosticZABufferPtr(*MF);
811 Register BufferSize =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
813 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
843 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
846void MachineSMEABI::emitStateChange(EmitContext &Context,
849 ZAState From, ZAState To,
852 if (From == ZAState::ANY || To == ZAState::ANY)
857 if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF)
862 if (From == ZAState::CALLER_DORMANT) {
864 "CALLER_DORMANT state requires private ZA interface");
866 "CALLER_DORMANT state only valid in entry block");
867 emitNewZAPrologue(
MBB,
MBB.getFirstNonPHI());
868 if (To == ZAState::ACTIVE)
874 From = ZAState::ACTIVE;
877 if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
878 emitZASave(Context,
MBB, InsertPt, PhysLiveRegs);
879 else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
880 emitZARestore(Context,
MBB, InsertPt, PhysLiveRegs);
881 else if (To == ZAState::OFF) {
882 assert(From != ZAState::CALLER_DORMANT &&
883 "CALLER_DORMANT to OFF should have already been handled");
885 "Should not turn ZA off in agnostic ZA function");
886 emitZAOff(
MBB, InsertPt, From == ZAState::LOCAL_SAVED);
888 dbgs() <<
"Error: Transition from " << getZAStateString(From) <<
" to "
889 << getZAStateString(To) <<
'\n';
904 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
905 if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() &&
906 !SMEFnAttrs.hasAgnosticZAInterface())
909 assert(MF.getRegInfo().isSSA() &&
"Expected to be run on SSA form!");
913 TII = Subtarget->getInstrInfo();
914 TRI = Subtarget->getRegisterInfo();
915 MRI = &MF.getRegInfo();
918 getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
920 FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
924 insertStateChanges(Context, FnInfo, Bundles, BundleStates);
926 if (Context.needsSaveBuffer()) {
927 if (FnInfo.AfterSMEProloguePt) {
931 emitAllocateZASaveBuffer(Context, *
MBBI->getParent(),
MBBI,
932 FnInfo.PhysLiveRegsAfterSMEPrologue);
935 emitAllocateZASaveBuffer(
937 FnInfo.Blocks[EntryBlock.
getNumber()].PhysLiveRegsAtEntry);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Register getEarlyAllocSMESaveBuffer() const
SMEAttrs getSMEFnAttrs() const
bool isTargetWindows() const
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasAgnosticZAInterface() const
bool hasPrivateZAInterface() const
typename SuperClass::const_iterator const_iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createMachineSMEABIPass()
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
@ LLVM_MARK_AS_BITMASK_ENUM
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
This struct is a compact representation of a valid (non-zero power of two) alignment.