71#define DEBUG_TYPE "aarch64-machine-sme-abi"
108 LiveRegs PhysLiveRegs;
109 Register StatusFlags = AArch64::NoRegister;
110 Register X0Save = AArch64::NoRegister;
113static bool isLegalEdgeBundleZAState(ZAState State) {
115 case ZAState::ACTIVE:
116 case ZAState::LOCAL_SAVED:
126StringRef getZAStateString(ZAState State) {
127#define MAKE_CASE(V) \
147 return AArch64::MPR128RegClass.contains(SR) ||
148 AArch64::ZTRRegClass.contains(SR);
154static std::pair<ZAState, MachineBasicBlock::iterator>
156 bool ZAOffAtReturn) {
159 if (
MI.getOpcode() == AArch64::InOutZAUsePseudo)
160 return {ZAState::ACTIVE, std::prev(InsertPt)};
162 if (
MI.getOpcode() == AArch64::RequiresZASavePseudo)
163 return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
166 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
168 for (
auto &MO :
MI.operands()) {
169 if (isZAorZTRegOp(
TRI, MO))
170 return {ZAState::ACTIVE, InsertPt};
173 return {ZAState::ANY, InsertPt};
177 inline static char ID = 0;
183 StringRef getPassName()
const override {
return "Machine SME ABI pass"; }
195 void collectNeededZAStates(
SMEAttrs);
199 void assignBundleZAStates();
203 void insertStateChanges();
210 LiveRegs PhysLiveRegs);
221 LiveRegs PhysLiveRegs);
227 LiveRegs PhysLiveRegs,
bool IsSave);
230 LiveRegs PhysLiveRegs);
233 ZAState From, ZAState To, LiveRegs PhysLiveRegs);
237 LiveRegs PhysLiveRegs) {
238 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
239 return emitFullZASaveRestore(
MBB,
MBBI, PhysLiveRegs,
true);
240 return emitSetupLazySave(
MBB,
MBBI);
243 LiveRegs PhysLiveRegs) {
244 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
245 return emitFullZASaveRestore(
MBB,
MBBI, PhysLiveRegs,
false);
246 return emitRestoreLazySave(
MBB,
MBBI, PhysLiveRegs);
250 LiveRegs PhysLiveRegs) {
251 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
252 return emitAllocateFullZASaveBuffer(
MBB,
MBBI, PhysLiveRegs);
253 return emitAllocateLazySaveBuffer(
MBB,
MBBI);
264 TPIDR2State getTPIDR2Block();
271 ZAState NeededState{ZAState::ANY};
273 LiveRegs PhysLiveRegs = LiveRegs::None;
279 ZAState FixedEntryState{ZAState::ANY};
281 LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
282 LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
289 std::optional<TPIDR2State> TPIDR2Block;
290 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
291 Register AgnosticZABufferPtr = AArch64::NoRegister;
292 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
304void MachineSMEABI::collectNeededZAStates(
SMEAttrs SMEFnAttrs) {
307 "Expected function to have ZA/ZT0 state!");
311 BlockInfo &
Block = State.Blocks[
MBB.getNumber()];
312 if (
MBB.isEntryBlock()) {
315 ? ZAState::CALLER_DORMANT
317 }
else if (
MBB.isEHPad()) {
319 Block.FixedEntryState = ZAState::LOCAL_SAVED;
325 auto GetPhysLiveRegs = [&] {
326 LiveRegs PhysLiveRegs = LiveRegs::None;
328 PhysLiveRegs |= LiveRegs::NZCV;
332 PhysLiveRegs |= LiveRegs::W0;
333 if (!LiveUnits.
available(AArch64::W0_HI))
334 PhysLiveRegs |= LiveRegs::W0_HI;
338 Block.PhysLiveRegsAtExit = GetPhysLiveRegs();
339 auto FirstTerminatorInsertPt =
MBB.getFirstTerminator();
340 auto FirstNonPhiInsertPt =
MBB.getFirstNonPHI();
344 LiveRegs PhysLiveRegs = GetPhysLiveRegs();
349 if (
MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
350 State.AfterSMEProloguePt =
MBBI;
351 State.PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
354 auto [NeededState, InsertPt] = getZAStateBeforeInst(
357 InsertPt->getOpcode() == AArch64::ADJCALLSTACKDOWN) &&
358 "Unexpected state change insertion point!");
360 if (
MBBI == FirstTerminatorInsertPt)
361 Block.PhysLiveRegsAtExit = PhysLiveRegs;
362 if (
MBBI == FirstNonPhiInsertPt)
363 Block.PhysLiveRegsAtEntry = PhysLiveRegs;
364 if (NeededState != ZAState::ANY)
365 Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
369 std::reverse(
Block.Insts.begin(),
Block.Insts.end());
373void MachineSMEABI::assignBundleZAStates() {
376 LLVM_DEBUG(
dbgs() <<
"Assigning ZA state for edge bundle: " <<
I <<
'\n');
383 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
384 for (
unsigned BlockID : Bundles->
getBlocks(
I)) {
387 const BlockInfo &
Block = State.Blocks[BlockID];
388 if (
Block.Insts.empty()) {
392 bool InEdge = Bundles->
getBundle(BlockID,
false) ==
I;
393 bool OutEdge = Bundles->
getBundle(BlockID,
true) ==
I;
395 ZAState DesiredIncomingState =
Block.Insts.front().NeededState;
396 if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
397 EdgeStateCounts[DesiredIncomingState]++;
399 << getZAStateString(DesiredIncomingState));
401 ZAState DesiredOutgoingState =
Block.Insts.back().NeededState;
402 if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
403 EdgeStateCounts[DesiredOutgoingState]++;
405 << getZAStateString(DesiredOutgoingState));
410 ZAState BundleState =
411 ZAState(
max_element(EdgeStateCounts) - EdgeStateCounts);
415 if (BundleState == ZAState::ANY)
416 BundleState = ZAState::ACTIVE;
419 dbgs() <<
"Chosen ZA state: " << getZAStateString(BundleState) <<
'\n'
422 dbgs() <<
" " << getZAStateString(ZAState(State)) <<
": " <<
Count;
426 State.BundleStates[
I] = BundleState;
430void MachineSMEABI::insertStateChanges() {
432 const BlockInfo &
Block = State.Blocks[
MBB.getNumber()];
433 ZAState InState = State.BundleStates[Bundles->
getBundle(
MBB.getNumber(),
436 ZAState CurrentState =
Block.FixedEntryState;
437 if (CurrentState == ZAState::ANY)
438 CurrentState = InState;
440 for (
auto &Inst :
Block.Insts) {
441 if (CurrentState != Inst.NeededState)
442 emitStateChange(
MBB, Inst.InsertPt, CurrentState, Inst.NeededState,
444 CurrentState = Inst.NeededState;
447 if (
MBB.succ_empty())
451 State.BundleStates[Bundles->
getBundle(
MBB.getNumber(),
true)];
452 if (CurrentState != OutState)
453 emitStateChange(
MBB,
MBB.getFirstTerminator(), CurrentState, OutState,
454 Block.PhysLiveRegsAtExit);
458TPIDR2State MachineSMEABI::getTPIDR2Block() {
459 if (State.TPIDR2Block)
460 return *State.TPIDR2Block;
463 return *State.TPIDR2Block;
469 return MBBI->getDebugLoc();
478 Register TPIDR2 =
MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
479 Register TPIDR2Ptr =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
488 .
addImm(AArch64SysReg::TPIDR2_EL0)
492PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
496 PhysRegSave RegSave{PhysLiveRegs};
497 if (PhysLiveRegs & LiveRegs::NZCV) {
498 RegSave.StatusFlags =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
500 .
addImm(AArch64SysReg::NZCV)
505 if (PhysLiveRegs & LiveRegs::W0) {
506 RegSave.X0Save =
MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI
507 ? &AArch64::GPR64RegClass
508 : &AArch64::GPR32RegClass);
510 .
addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0);
515void MachineSMEABI::restorePhyRegSave(PhysRegSave
const &RegSave,
519 if (RegSave.StatusFlags != AArch64::NoRegister)
521 .
addImm(AArch64SysReg::NZCV)
522 .
addReg(RegSave.StatusFlags)
525 if (RegSave.X0Save != AArch64::NoRegister)
527 RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0)
533 LiveRegs PhysLiveRegs) {
536 Register TPIDR2EL0 =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
540 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
544 .
addImm(AArch64SVCR::SVCRZA)
548 .
addImm(AArch64SysReg::TPIDR2_EL0);
559 .
addRegMask(
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
562 .
addImm(AArch64SysReg::TPIDR2_EL0)
565 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
575 .
addImm(AArch64SysReg::TPIDR2_EL0)
580 .
addImm(AArch64SVCR::SVCRZA)
584void MachineSMEABI::emitAllocateLazySaveBuffer(
588 Register SP =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
589 Register SVL =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
596 if (Buffer == AArch64::NoRegister) {
604 "Lazy ZA save is not yet supported on Windows");
605 Buffer =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
626 "TPIDR2 block initialization is not supported on big-endian targets");
644 Register TPIDR2EL0 =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
647 .
addImm(AArch64SysReg::TPIDR2_EL0);
656 .
addRegMask(
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
661 .
addImm(AArch64SVCR::SVCRZA)
665Register MachineSMEABI::getAgnosticZABufferPtr() {
666 if (State.AgnosticZABufferPtr != AArch64::NoRegister)
667 return State.AgnosticZABufferPtr;
669 State.AgnosticZABufferPtr =
670 BufferPtr != AArch64::NoRegister
673 return State.AgnosticZABufferPtr;
678 LiveRegs PhysLiveRegs,
bool IsSave) {
683 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
687 .
addReg(getAgnosticZABufferPtr());
693 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE))
698 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
701void MachineSMEABI::emitAllocateFullZASaveBuffer(
703 LiveRegs PhysLiveRegs) {
709 Register BufferPtr = getAgnosticZABufferPtr();
710 Register BufferSize =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
712 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
742 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
747 ZAState From, ZAState To,
748 LiveRegs PhysLiveRegs) {
751 if (From == ZAState::ANY || To == ZAState::ANY)
756 if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF)
761 if (From == ZAState::CALLER_DORMANT) {
763 "CALLER_DORMANT state requires private ZA interface");
765 "CALLER_DORMANT state only valid in entry block");
766 emitNewZAPrologue(
MBB,
MBB.getFirstNonPHI());
767 if (To == ZAState::ACTIVE)
773 From = ZAState::ACTIVE;
776 if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
777 emitZASave(
MBB, InsertPt, PhysLiveRegs);
778 else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
779 emitZARestore(
MBB, InsertPt, PhysLiveRegs);
780 else if (To == ZAState::OFF) {
781 assert(From != ZAState::CALLER_DORMANT &&
782 "CALLER_DORMANT to OFF should have already been handled");
784 "Should not turn ZA off in agnostic ZA function");
785 emitZAOff(
MBB, InsertPt, From == ZAState::LOCAL_SAVED);
787 dbgs() <<
"Error: Transition from " << getZAStateString(From) <<
" to "
788 << getZAStateString(To) <<
'\n';
803 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
804 if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() &&
805 !SMEFnAttrs.hasAgnosticZAInterface())
808 assert(MF.getRegInfo().isSSA() &&
"Expected to be run on SSA form!");
813 Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
815 TII = Subtarget->getInstrInfo();
816 TRI = Subtarget->getRegisterInfo();
817 MRI = &MF.getRegInfo();
819 collectNeededZAStates(SMEFnAttrs);
820 assignBundleZAStates();
821 insertStateChanges();
824 if (State.AgnosticZABufferPtr != AArch64::NoRegister || State.TPIDR2Block) {
825 if (State.AfterSMEProloguePt) {
828 emitAllocateZASaveBuffer(*(*State.AfterSMEProloguePt)->getParent(),
829 *State.AfterSMEProloguePt,
830 State.PhysLiveRegsAfterSMEPrologue);
833 emitAllocateZASaveBuffer(
835 State.Blocks[EntryBlock.
getNumber()].PhysLiveRegsAtEntry);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Register getEarlyAllocSMESaveBuffer() const
SMEAttrs getSMEFnAttrs() const
bool isTargetWindows() const
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasAgnosticZAInterface() const
bool hasPrivateZAInterface() const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createMachineSMEABIPass()
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
@ LLVM_MARK_AS_BITMASK_ENUM
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
This struct is a compact representation of a valid (non-zero power of two) alignment.