71#define DEBUG_TYPE "aarch64-machine-sme-abi"
108 LiveRegs PhysLiveRegs;
109 Register StatusFlags = AArch64::NoRegister;
110 Register X0Save = AArch64::NoRegister;
113static bool isLegalEdgeBundleZAState(ZAState State) {
115 case ZAState::ACTIVE:
116 case ZAState::LOCAL_SAVED:
126StringRef getZAStateString(ZAState State) {
127#define MAKE_CASE(V) \
147 return AArch64::MPR128RegClass.contains(SR) ||
148 AArch64::ZTRRegClass.contains(SR);
154static std::pair<ZAState, MachineBasicBlock::iterator>
156 bool ZAOffAtReturn) {
159 if (
MI.getOpcode() == AArch64::InOutZAUsePseudo)
160 return {ZAState::ACTIVE, std::prev(InsertPt)};
162 if (
MI.getOpcode() == AArch64::RequiresZASavePseudo)
163 return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
166 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
168 for (
auto &MO :
MI.operands()) {
169 if (isZAorZTRegOp(
TRI, MO))
170 return {ZAState::ACTIVE, InsertPt};
173 return {ZAState::ANY, InsertPt};
177 inline static char ID = 0;
195 void collectNeededZAStates(
SMEAttrs);
199 void assignBundleZAStates();
203 void insertStateChanges();
210 LiveRegs PhysLiveRegs);
219 ZAState
From, ZAState To, LiveRegs PhysLiveRegs);
229 TPIDR2State getTPIDR2Block();
234 ZAState NeededState{ZAState::ANY};
236 LiveRegs PhysLiveRegs = LiveRegs::None;
242 ZAState FixedEntryState{ZAState::ANY};
244 LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
251 std::optional<TPIDR2State> TPIDR2Block;
262void MachineSMEABI::collectNeededZAStates(
SMEAttrs SMEFnAttrs) {
264 "Expected function to have ZA/ZT0 state!");
266 State.Blocks.resize(MF->getNumBlockIDs());
272 ? ZAState::CALLER_DORMANT
276 Block.FixedEntryState = ZAState::LOCAL_SAVED;
282 auto GetPhysLiveRegs = [&] {
283 LiveRegs PhysLiveRegs = LiveRegs::None;
285 PhysLiveRegs |= LiveRegs::NZCV;
289 PhysLiveRegs |= LiveRegs::W0;
290 if (!LiveUnits.
available(AArch64::W0_HI))
291 PhysLiveRegs |= LiveRegs::W0_HI;
295 Block.PhysLiveRegsAtExit = GetPhysLiveRegs();
300 LiveRegs PhysLiveRegs = GetPhysLiveRegs();
301 auto [NeededState, InsertPt] = getZAStateBeforeInst(
304 InsertPt->getOpcode() == AArch64::ADJCALLSTACKDOWN) &&
305 "Unexpected state change insertion point!");
307 if (
MBBI == FirstTerminatorInsertPt)
308 Block.PhysLiveRegsAtExit = PhysLiveRegs;
309 if (NeededState != ZAState::ANY)
310 Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
314 std::reverse(
Block.Insts.begin(),
Block.Insts.end());
318void MachineSMEABI::assignBundleZAStates() {
319 State.BundleStates.resize(Bundles->getNumBundles());
320 for (
unsigned I = 0, E = Bundles->getNumBundles();
I != E; ++
I) {
321 LLVM_DEBUG(
dbgs() <<
"Assigning ZA state for edge bundle: " <<
I <<
'\n');
328 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
329 for (
unsigned BlockID : Bundles->getBlocks(
I)) {
332 const BlockInfo &
Block = State.Blocks[BlockID];
333 if (
Block.Insts.empty()) {
337 bool InEdge = Bundles->getBundle(BlockID,
false) ==
I;
338 bool OutEdge = Bundles->getBundle(BlockID,
true) ==
I;
340 ZAState DesiredIncomingState =
Block.Insts.front().NeededState;
341 if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
342 EdgeStateCounts[DesiredIncomingState]++;
344 << getZAStateString(DesiredIncomingState));
346 ZAState DesiredOutgoingState =
Block.Insts.back().NeededState;
347 if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
348 EdgeStateCounts[DesiredOutgoingState]++;
350 << getZAStateString(DesiredOutgoingState));
355 ZAState BundleState =
356 ZAState(
max_element(EdgeStateCounts) - EdgeStateCounts);
360 if (BundleState == ZAState::ANY)
361 BundleState = ZAState::ACTIVE;
364 dbgs() <<
"Chosen ZA state: " << getZAStateString(BundleState) <<
'\n'
366 for (
auto [State, Count] :
enumerate(EdgeStateCounts))
367 dbgs() <<
" " << getZAStateString(ZAState(State)) <<
": " << Count;
371 State.BundleStates[
I] = BundleState;
375void MachineSMEABI::insertStateChanges() {
378 ZAState InState = State.BundleStates[Bundles->getBundle(
MBB.
getNumber(),
381 ZAState CurrentState =
Block.FixedEntryState;
382 if (CurrentState == ZAState::ANY)
383 CurrentState = InState;
385 for (
auto &Inst :
Block.Insts) {
386 if (CurrentState != Inst.NeededState)
387 emitStateChange(
MBB, Inst.InsertPt, CurrentState, Inst.NeededState,
389 CurrentState = Inst.NeededState;
396 State.BundleStates[Bundles->getBundle(
MBB.
getNumber(),
true)];
397 if (CurrentState != OutState)
399 Block.PhysLiveRegsAtExit);
403TPIDR2State MachineSMEABI::getTPIDR2Block() {
404 if (State.TPIDR2Block)
405 return *State.TPIDR2Block;
408 return *State.TPIDR2Block;
414 return MBBI->getDebugLoc();
423 Register TPIDR2 =
MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
424 Register TPIDR2Ptr =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
433 .
addImm(AArch64SysReg::TPIDR2_EL0)
437PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
441 PhysRegSave RegSave{PhysLiveRegs};
442 if (PhysLiveRegs & LiveRegs::NZCV) {
443 RegSave.StatusFlags =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
445 .
addImm(AArch64SysReg::NZCV)
450 if (PhysLiveRegs & LiveRegs::W0) {
451 RegSave.X0Save =
MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI
452 ? &AArch64::GPR64RegClass
453 : &AArch64::GPR32RegClass);
455 .
addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0);
460void MachineSMEABI::restorePhyRegSave(PhysRegSave
const &RegSave,
464 if (RegSave.StatusFlags != AArch64::NoRegister)
466 .
addImm(AArch64SysReg::NZCV)
467 .
addReg(RegSave.StatusFlags)
470 if (RegSave.X0Save != AArch64::NoRegister)
472 RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0)
478 LiveRegs PhysLiveRegs) {
479 auto *TLI = Subtarget->getTargetLowering();
481 Register TPIDR2EL0 =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
485 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs,
MBB,
MBBI,
DL);
489 .
addImm(AArch64SVCR::SVCRZA)
493 .
addImm(AArch64SysReg::TPIDR2_EL0);
504 .
addRegMask(
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
507 .
addImm(AArch64SysReg::TPIDR2_EL0)
510 restorePhyRegSave(RegSave,
MBB,
MBBI,
DL);
520 .
addImm(AArch64SysReg::TPIDR2_EL0)
525 .
addImm(AArch64SVCR::SVCRZA)
529void MachineSMEABI::emitAllocateLazySaveBuffer(
534 Register SP =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
535 Register SVL =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
536 Register Buffer =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
547 assert(!Subtarget->isTargetWindows() &&
548 "Lazy ZA save is not yet supported on Windows");
567 if (!Subtarget->isLittleEndian())
569 "TPIDR2 block initialization is not supported on big-endian targets");
583 auto *TLI = Subtarget->getTargetLowering();
587 Register TPIDR2EL0 =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
590 .
addImm(AArch64SysReg::TPIDR2_EL0);
600 .
addRegMask(
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
605 .
addImm(AArch64SVCR::SVCRZA)
611 ZAState
From, ZAState To,
612 LiveRegs PhysLiveRegs) {
615 if (
From == ZAState::ANY || To == ZAState::ANY)
620 if (
From == ZAState::CALLER_DORMANT && To == ZAState::OFF)
625 if (
From == ZAState::CALLER_DORMANT) {
630 "CALLER_DORMANT state requires private ZA interface");
632 "CALLER_DORMANT state only valid in entry block");
634 if (To == ZAState::ACTIVE)
640 From = ZAState::ACTIVE;
643 if (
From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
644 emitSetupLazySave(
MBB, InsertPt);
645 else if (
From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
646 emitRestoreLazySave(
MBB, InsertPt, PhysLiveRegs);
647 else if (To == ZAState::OFF) {
649 "CALLER_DORMANT to OFF should have already been handled");
650 emitZAOff(
MBB, InsertPt,
From == ZAState::LOCAL_SAVED);
652 dbgs() <<
"Error: Transition from " << getZAStateString(
From) <<
" to "
653 << getZAStateString(To) <<
'\n';
668 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
669 if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State())
672 assert(MF.getRegInfo().isSSA() &&
"Expected to be run on SSA form!");
677 Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
679 TII = Subtarget->getInstrInfo();
680 TRI = Subtarget->getRegisterInfo();
681 MRI = &MF.getRegInfo();
683 collectNeededZAStates(SMEFnAttrs);
684 assignBundleZAStates();
685 insertStateChanges();
688 if (State.TPIDR2Block) {
690 emitAllocateLazySaveBuffer(EntryBlock, EntryBlock.
getFirstNonPHI());
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
BlockVerifier::State From
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
SMEAttrs getSMEFnAttrs() const
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
bool isEHPad() const
Returns true if the block is a landing pad.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasPrivateZAInterface() const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createMachineSMEABIPass()
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
This struct is a compact representation of a valid (non-zero power of two) alignment.