LLVM 22.0.0git
MachineSMEABIPass.cpp
Go to the documentation of this file.
1//===- MachineSMEABIPass.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements the SME ABI requirements for ZA state. This includes
10// implementing the lazy ZA state save schemes around calls.
11//
12//===----------------------------------------------------------------------===//
13//
14// This pass works by collecting instructions that require ZA to be in a
15// specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state
16// transitions to ensure ZA is in the required state before instructions. State
17// transitions represent actions such as setting up or restoring a lazy save.
18// Certain points within a function may also have predefined states independent
19// of any instructions, for example, a "shared_za" function is always entered
20// and exited in the "ACTIVE" state.
21//
22// To handle ZA state across control flow, we make use of edge bundling. This
23// assigns each block an "incoming" and "outgoing" edge bundle (representing
24// incoming and outgoing edges). Initially, these are unique to each block;
25// then, in the process of forming bundles, the outgoing block of a block is
26// joined with the incoming bundle of all successors. The result is that each
27// bundle can be assigned a single ZA state, which ensures the state required by
28// all a blocks' successors is the same, and that each basic block will always
29// be entered with the same ZA state. This eliminates the need for splitting
30// edges to insert state transitions or "phi" nodes for ZA states.
31//
32// See below for a simple example of edge bundling.
33//
34// The following shows a conditionally executed basic block (BB1):
35//
36// if (cond)
37// BB1
38// BB2
39//
40// Initial Bundles Joined Bundles
41//
42// ┌──0──┐ ┌──0──┐
43// │ BB0 │ │ BB0 │
44// └──1──┘ └──1──┘
45// ├───────┐ ├───────┐
46// ▼ │ ▼ │
47// ┌──2──┐ │ ─────► ┌──1──┐ │
48// │ BB1 │ ▼ │ BB1 │ ▼
49// └──3──┘ ┌──4──┐ └──1──┘ ┌──1──┐
50// └───►4 BB2 │ └───►1 BB2 │
51// └──5──┘ └──2──┘
52//
53// On the left are the initial per-block bundles, and on the right are the
54// joined bundles (which are the result of the EdgeBundles analysis).
55
56#include "AArch64InstrInfo.h"
58#include "AArch64Subtarget.h"
68
69using namespace llvm;
70
71#define DEBUG_TYPE "aarch64-machine-sme-abi"
72
73namespace {
74
75enum ZAState {
76 // Any/unknown state (not valid)
77 ANY = 0,
78
79 // ZA is in use and active (i.e. within the accumulator)
80 ACTIVE,
81
82 // A ZA save has been set up or committed (i.e. ZA is dormant or off)
83 LOCAL_SAVED,
84
85 // ZA is off or a lazy save has been set up by the caller
86 CALLER_DORMANT,
87
88 // ZA is off
89 OFF,
90
91 // The number of ZA states (not a valid state)
92 NUM_ZA_STATE
93};
94
95/// A bitmask enum to record live physical registers that the "emit*" routines
96/// may need to preserve. Note: This only tracks registers we may clobber.
97enum LiveRegs : uint8_t {
98 None = 0,
99 NZCV = 1 << 0,
100 W0 = 1 << 1,
101 W0_HI = 1 << 2,
102 X0 = W0 | W0_HI,
103 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ W0_HI)
104};
105
106/// Holds the virtual registers live physical registers have been saved to.
107struct PhysRegSave {
108 LiveRegs PhysLiveRegs;
109 Register StatusFlags = AArch64::NoRegister;
110 Register X0Save = AArch64::NoRegister;
111};
112
113static bool isLegalEdgeBundleZAState(ZAState State) {
114 switch (State) {
115 case ZAState::ACTIVE:
116 case ZAState::LOCAL_SAVED:
117 return true;
118 default:
119 return false;
120 }
121}
122struct TPIDR2State {
123 int FrameIndex = -1;
124};
125
126StringRef getZAStateString(ZAState State) {
127#define MAKE_CASE(V) \
128 case V: \
129 return #V;
130 switch (State) {
131 MAKE_CASE(ZAState::ANY)
132 MAKE_CASE(ZAState::ACTIVE)
133 MAKE_CASE(ZAState::LOCAL_SAVED)
134 MAKE_CASE(ZAState::CALLER_DORMANT)
135 MAKE_CASE(ZAState::OFF)
136 default:
137 llvm_unreachable("Unexpected ZAState");
138 }
139#undef MAKE_CASE
140}
141
142static bool isZAorZTRegOp(const TargetRegisterInfo &TRI,
143 const MachineOperand &MO) {
144 if (!MO.isReg() || !MO.getReg().isPhysical())
145 return false;
146 return any_of(TRI.subregs_inclusive(MO.getReg()), [](const MCPhysReg &SR) {
147 return AArch64::MPR128RegClass.contains(SR) ||
148 AArch64::ZTRRegClass.contains(SR);
149 });
150}
151
152/// Returns the required ZA state needed before \p MI and an iterator pointing
153/// to where any code required to change the ZA state should be inserted.
154static std::pair<ZAState, MachineBasicBlock::iterator>
155getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI,
156 bool ZAOffAtReturn) {
158
159 if (MI.getOpcode() == AArch64::InOutZAUsePseudo)
160 return {ZAState::ACTIVE, std::prev(InsertPt)};
161
162 if (MI.getOpcode() == AArch64::RequiresZASavePseudo)
163 return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
164
165 if (MI.isReturn())
166 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
167
168 for (auto &MO : MI.operands()) {
169 if (isZAorZTRegOp(TRI, MO))
170 return {ZAState::ACTIVE, InsertPt};
171 }
172
173 return {ZAState::ANY, InsertPt};
174}
175
176struct MachineSMEABI : public MachineFunctionPass {
177 inline static char ID = 0;
178
179 MachineSMEABI() : MachineFunctionPass(ID) {}
180
181 bool runOnMachineFunction(MachineFunction &MF) override;
182
183 StringRef getPassName() const override { return "Machine SME ABI pass"; }
184
185 void getAnalysisUsage(AnalysisUsage &AU) const override {
186 AU.setPreservesCFG();
191 }
192
193 /// Collects the needed ZA state (and live registers) before each instruction
194 /// within the machine function.
195 void collectNeededZAStates(SMEAttrs);
196
197 /// Assigns each edge bundle a ZA state based on the needed states of blocks
198 /// that have incoming or outgoing edges in that bundle.
199 void assignBundleZAStates();
200
201 /// Inserts code to handle changes between ZA states within the function.
202 /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
203 void insertStateChanges();
204
205 // Emission routines for private and shared ZA functions (using lazy saves).
206 void emitNewZAPrologue(MachineBasicBlock &MBB,
208 void emitRestoreLazySave(MachineBasicBlock &MBB,
210 LiveRegs PhysLiveRegs);
211 void emitSetupLazySave(MachineBasicBlock &MBB,
213 void emitAllocateLazySaveBuffer(MachineBasicBlock &MBB,
216 bool ClearTPIDR2);
217
219 ZAState From, ZAState To, LiveRegs PhysLiveRegs);
220
221 /// Save live physical registers to virtual registers.
222 PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB,
224 /// Restore physical registers from a save of their previous values.
225 void restorePhyRegSave(PhysRegSave const &RegSave, MachineBasicBlock &MBB,
227
228 /// Get or create a TPIDR2 block in this function.
229 TPIDR2State getTPIDR2Block();
230
231private:
232 /// Contains the needed ZA state (and live registers) at an instruction.
233 struct InstInfo {
234 ZAState NeededState{ZAState::ANY};
236 LiveRegs PhysLiveRegs = LiveRegs::None;
237 };
238
239 /// Contains the needed ZA state for each instruction in a block.
240 /// Instructions that do not require a ZA state are not recorded.
241 struct BlockInfo {
242 ZAState FixedEntryState{ZAState::ANY};
244 LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
245 };
246
247 // All pass state that must be cleared between functions.
248 struct PassState {
250 SmallVector<ZAState> BundleStates;
251 std::optional<TPIDR2State> TPIDR2Block;
252 } State;
253
254 MachineFunction *MF = nullptr;
255 EdgeBundles *Bundles = nullptr;
256 const AArch64Subtarget *Subtarget = nullptr;
257 const AArch64RegisterInfo *TRI = nullptr;
258 const TargetInstrInfo *TII = nullptr;
259 MachineRegisterInfo *MRI = nullptr;
260};
261
262void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
263 assert((SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) &&
264 "Expected function to have ZA/ZT0 state!");
265
266 State.Blocks.resize(MF->getNumBlockIDs());
267 for (MachineBasicBlock &MBB : *MF) {
268 BlockInfo &Block = State.Blocks[MBB.getNumber()];
269 if (MBB.isEntryBlock()) {
270 // Entry block:
271 Block.FixedEntryState = SMEFnAttrs.hasPrivateZAInterface()
272 ? ZAState::CALLER_DORMANT
273 : ZAState::ACTIVE;
274 } else if (MBB.isEHPad()) {
275 // EH entry block:
276 Block.FixedEntryState = ZAState::LOCAL_SAVED;
277 }
278
279 LiveRegUnits LiveUnits(*TRI);
280 LiveUnits.addLiveOuts(MBB);
281
282 auto GetPhysLiveRegs = [&] {
283 LiveRegs PhysLiveRegs = LiveRegs::None;
284 if (!LiveUnits.available(AArch64::NZCV))
285 PhysLiveRegs |= LiveRegs::NZCV;
286 // We have to track W0 and X0 separately as otherwise things can get
287 // confused if we attempt to preserve X0 but only W0 was defined.
288 if (!LiveUnits.available(AArch64::W0))
289 PhysLiveRegs |= LiveRegs::W0;
290 if (!LiveUnits.available(AArch64::W0_HI))
291 PhysLiveRegs |= LiveRegs::W0_HI;
292 return PhysLiveRegs;
293 };
294
295 Block.PhysLiveRegsAtExit = GetPhysLiveRegs();
296 auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
297 for (MachineInstr &MI : reverse(MBB)) {
299 LiveUnits.stepBackward(MI);
300 LiveRegs PhysLiveRegs = GetPhysLiveRegs();
301 auto [NeededState, InsertPt] = getZAStateBeforeInst(
302 *TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface());
303 assert((InsertPt == MBBI ||
304 InsertPt->getOpcode() == AArch64::ADJCALLSTACKDOWN) &&
305 "Unexpected state change insertion point!");
306 // TODO: Do something to avoid state changes where NZCV is live.
307 if (MBBI == FirstTerminatorInsertPt)
308 Block.PhysLiveRegsAtExit = PhysLiveRegs;
309 if (NeededState != ZAState::ANY)
310 Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
311 }
312
313 // Reverse vector (as we had to iterate backwards for liveness).
314 std::reverse(Block.Insts.begin(), Block.Insts.end());
315 }
316}
317
318void MachineSMEABI::assignBundleZAStates() {
319 State.BundleStates.resize(Bundles->getNumBundles());
320 for (unsigned I = 0, E = Bundles->getNumBundles(); I != E; ++I) {
321 LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
322
323 // Attempt to assign a ZA state for this bundle that minimizes state
324 // transitions. Edges within loops are given a higher weight as we assume
325 // they will be executed more than once.
326 // TODO: We should propagate desired incoming/outgoing states through blocks
327 // that have the "ANY" state first to make better global decisions.
328 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
329 for (unsigned BlockID : Bundles->getBlocks(I)) {
330 LLVM_DEBUG(dbgs() << "- bb." << BlockID);
331
332 const BlockInfo &Block = State.Blocks[BlockID];
333 if (Block.Insts.empty()) {
334 LLVM_DEBUG(dbgs() << " (no state preference)\n");
335 continue;
336 }
337 bool InEdge = Bundles->getBundle(BlockID, /*Out=*/false) == I;
338 bool OutEdge = Bundles->getBundle(BlockID, /*Out=*/true) == I;
339
340 ZAState DesiredIncomingState = Block.Insts.front().NeededState;
341 if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
342 EdgeStateCounts[DesiredIncomingState]++;
343 LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
344 << getZAStateString(DesiredIncomingState));
345 }
346 ZAState DesiredOutgoingState = Block.Insts.back().NeededState;
347 if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
348 EdgeStateCounts[DesiredOutgoingState]++;
349 LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
350 << getZAStateString(DesiredOutgoingState));
351 }
352 LLVM_DEBUG(dbgs() << '\n');
353 }
354
355 ZAState BundleState =
356 ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
357
358 // Force ZA to be active in bundles that don't have a preferred state.
359 // TODO: Something better here (to avoid extra mode switches).
360 if (BundleState == ZAState::ANY)
361 BundleState = ZAState::ACTIVE;
362
363 LLVM_DEBUG({
364 dbgs() << "Chosen ZA state: " << getZAStateString(BundleState) << '\n'
365 << "Edge counts:";
366 for (auto [State, Count] : enumerate(EdgeStateCounts))
367 dbgs() << " " << getZAStateString(ZAState(State)) << ": " << Count;
368 dbgs() << "\n\n";
369 });
370
371 State.BundleStates[I] = BundleState;
372 }
373}
374
375void MachineSMEABI::insertStateChanges() {
376 for (MachineBasicBlock &MBB : *MF) {
377 const BlockInfo &Block = State.Blocks[MBB.getNumber()];
378 ZAState InState = State.BundleStates[Bundles->getBundle(MBB.getNumber(),
379 /*Out=*/false)];
380
381 ZAState CurrentState = Block.FixedEntryState;
382 if (CurrentState == ZAState::ANY)
383 CurrentState = InState;
384
385 for (auto &Inst : Block.Insts) {
386 if (CurrentState != Inst.NeededState)
387 emitStateChange(MBB, Inst.InsertPt, CurrentState, Inst.NeededState,
388 Inst.PhysLiveRegs);
389 CurrentState = Inst.NeededState;
390 }
391
392 if (MBB.succ_empty())
393 continue;
394
395 ZAState OutState =
396 State.BundleStates[Bundles->getBundle(MBB.getNumber(), /*Out=*/true)];
397 if (CurrentState != OutState)
398 emitStateChange(MBB, MBB.getFirstTerminator(), CurrentState, OutState,
399 Block.PhysLiveRegsAtExit);
400 }
401}
402
403TPIDR2State MachineSMEABI::getTPIDR2Block() {
404 if (State.TPIDR2Block)
405 return *State.TPIDR2Block;
406 MachineFrameInfo &MFI = MF->getFrameInfo();
407 State.TPIDR2Block = TPIDR2State{MFI.CreateStackObject(16, Align(16), false)};
408 return *State.TPIDR2Block;
409}
410
413 if (MBBI != MBB.end())
414 return MBBI->getDebugLoc();
415 return DebugLoc();
416}
417
418void MachineSMEABI::emitSetupLazySave(MachineBasicBlock &MBB,
421
422 // Get pointer to TPIDR2 block.
423 Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
424 Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
425 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
426 .addFrameIndex(getTPIDR2Block().FrameIndex)
427 .addImm(0)
428 .addImm(0);
429 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), TPIDR2Ptr)
430 .addReg(TPIDR2);
431 // Set TPIDR2_EL0 to point to TPIDR2 block.
432 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
433 .addImm(AArch64SysReg::TPIDR2_EL0)
434 .addReg(TPIDR2Ptr);
435}
436
437PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
440 DebugLoc DL) {
441 PhysRegSave RegSave{PhysLiveRegs};
442 if (PhysLiveRegs & LiveRegs::NZCV) {
443 RegSave.StatusFlags = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
444 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), RegSave.StatusFlags)
445 .addImm(AArch64SysReg::NZCV)
446 .addReg(AArch64::NZCV, RegState::Implicit);
447 }
448 // Note: Preserving X0 is "free" as this is before register allocation, so
449 // the register allocator is still able to optimize these copies.
450 if (PhysLiveRegs & LiveRegs::W0) {
451 RegSave.X0Save = MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI
452 ? &AArch64::GPR64RegClass
453 : &AArch64::GPR32RegClass);
454 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), RegSave.X0Save)
455 .addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0);
456 }
457 return RegSave;
458}
459
460void MachineSMEABI::restorePhyRegSave(PhysRegSave const &RegSave,
463 DebugLoc DL) {
464 if (RegSave.StatusFlags != AArch64::NoRegister)
465 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
466 .addImm(AArch64SysReg::NZCV)
467 .addReg(RegSave.StatusFlags)
468 .addReg(AArch64::NZCV, RegState::ImplicitDefine);
469
470 if (RegSave.X0Save != AArch64::NoRegister)
471 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY),
472 RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0)
473 .addReg(RegSave.X0Save);
474}
475
476void MachineSMEABI::emitRestoreLazySave(MachineBasicBlock &MBB,
478 LiveRegs PhysLiveRegs) {
479 auto *TLI = Subtarget->getTargetLowering();
481 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
482 Register TPIDR2 = AArch64::X0;
483
484 // TODO: Emit these within the restore MBB to prevent unnecessary saves.
485 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
486
487 // Enable ZA.
488 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
489 .addImm(AArch64SVCR::SVCRZA)
490 .addImm(1);
491 // Get current TPIDR2_EL0.
492 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), TPIDR2EL0)
493 .addImm(AArch64SysReg::TPIDR2_EL0);
494 // Get pointer to TPIDR2 block.
495 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
496 .addFrameIndex(getTPIDR2Block().FrameIndex)
497 .addImm(0)
498 .addImm(0);
499 // (Conditionally) restore ZA state.
500 BuildMI(MBB, MBBI, DL, TII->get(AArch64::RestoreZAPseudo))
501 .addReg(TPIDR2EL0)
502 .addReg(TPIDR2)
503 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_RESTORE))
504 .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
505 // Zero TPIDR2_EL0.
506 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
507 .addImm(AArch64SysReg::TPIDR2_EL0)
508 .addReg(AArch64::XZR);
509
510 restorePhyRegSave(RegSave, MBB, MBBI, DL);
511}
512
513void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
515 bool ClearTPIDR2) {
517
518 if (ClearTPIDR2)
519 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
520 .addImm(AArch64SysReg::TPIDR2_EL0)
521 .addReg(AArch64::XZR);
522
523 // Disable ZA.
524 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
525 .addImm(AArch64SVCR::SVCRZA)
526 .addImm(0);
527}
528
529void MachineSMEABI::emitAllocateLazySaveBuffer(
531 MachineFrameInfo &MFI = MF->getFrameInfo();
532
534 Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
535 Register SVL = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
536 Register Buffer = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
537
538 // Calculate SVL.
539 BuildMI(MBB, MBBI, DL, TII->get(AArch64::RDSVLI_XI), SVL).addImm(1);
540
541 // 1. Allocate the lazy save buffer.
542 {
543 // TODO This function grows the stack with a subtraction, which doesn't work
544 // on Windows. Some refactoring to share the functionality in
545 // LowerWindowsDYNAMIC_STACKALLOC will be required once the Windows ABI
546 // supports SME
547 assert(!Subtarget->isTargetWindows() &&
548 "Lazy ZA save is not yet supported on Windows");
549 // Get original stack pointer.
550 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), SP)
551 .addReg(AArch64::SP);
552 // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
553 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSUBXrrr), Buffer)
554 .addReg(SVL)
555 .addReg(SVL)
556 .addReg(SP);
557 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::SP)
558 .addReg(Buffer);
559 // We have just allocated a variable sized object, tell this to PEI.
560 MFI.CreateVariableSizedObject(Align(16), nullptr);
561 }
562
563 // 2. Setup the TPIDR2 block.
564 {
565 // Note: This case just needs to do `SVL << 48`. It is not implemented as we
566 // generally don't support big-endian SVE/SME.
567 if (!Subtarget->isLittleEndian())
569 "TPIDR2 block initialization is not supported on big-endian targets");
570
571 // Store buffer pointer and num_za_save_slices.
572 // Bytes 10-15 are implicitly zeroed.
573 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STPXi))
574 .addReg(Buffer)
575 .addReg(SVL)
576 .addFrameIndex(getTPIDR2Block().FrameIndex)
577 .addImm(0);
578 }
579}
580
581void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB,
583 auto *TLI = Subtarget->getTargetLowering();
585
586 // Get current TPIDR2_EL0.
587 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
588 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS))
589 .addReg(TPIDR2EL0, RegState::Define)
590 .addImm(AArch64SysReg::TPIDR2_EL0);
591 // If TPIDR2_EL0 is non-zero, commit the lazy save.
592 // NOTE: Functions that only use ZT0 don't need to zero ZA.
593 bool ZeroZA =
594 MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs().hasZAState();
595 auto CommitZASave =
596 BuildMI(MBB, MBBI, DL, TII->get(AArch64::CommitZASavePseudo))
597 .addReg(TPIDR2EL0)
598 .addImm(ZeroZA ? 1 : 0)
599 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_SAVE))
600 .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
601 if (ZeroZA)
602 CommitZASave.addDef(AArch64::ZAB0, RegState::ImplicitDefine);
603 // Enable ZA (as ZA could have previously been in the OFF state).
604 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
605 .addImm(AArch64SVCR::SVCRZA)
606 .addImm(1);
607}
608
609void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
611 ZAState From, ZAState To,
612 LiveRegs PhysLiveRegs) {
613
614 // ZA not used.
615 if (From == ZAState::ANY || To == ZAState::ANY)
616 return;
617
618 // If we're exiting from the CALLER_DORMANT state that means this new ZA
619 // function did not touch ZA (so ZA was never turned on).
620 if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF)
621 return;
622
623 // TODO: Avoid setting up the save buffer if there's no transition to
624 // LOCAL_SAVED.
625 if (From == ZAState::CALLER_DORMANT) {
628 ->getSMEFnAttrs()
630 "CALLER_DORMANT state requires private ZA interface");
631 assert(&MBB == &MBB.getParent()->front() &&
632 "CALLER_DORMANT state only valid in entry block");
633 emitNewZAPrologue(MBB, MBB.getFirstNonPHI());
634 if (To == ZAState::ACTIVE)
635 return; // Nothing more to do (ZA is active after the prologue).
636
637 // Note: "emitNewZAPrologue" zeros ZA, so we may need to setup a lazy save
638 // if "To" is "ZAState::LOCAL_SAVED". It may be possible to improve this
639 // case by changing the placement of the zero instruction.
640 From = ZAState::ACTIVE;
641 }
642
643 if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
644 emitSetupLazySave(MBB, InsertPt);
645 else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
646 emitRestoreLazySave(MBB, InsertPt, PhysLiveRegs);
647 else if (To == ZAState::OFF) {
648 assert(From != ZAState::CALLER_DORMANT &&
649 "CALLER_DORMANT to OFF should have already been handled");
650 emitZAOff(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED);
651 } else {
652 dbgs() << "Error: Transition from " << getZAStateString(From) << " to "
653 << getZAStateString(To) << '\n';
654 llvm_unreachable("Unimplemented state transition");
655 }
656}
657
658} // end anonymous namespace
659
660INITIALIZE_PASS(MachineSMEABI, "aarch64-machine-sme-abi", "Machine SME ABI",
661 false, false)
662
663bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
664 if (!MF.getSubtarget<AArch64Subtarget>().hasSME())
665 return false;
666
667 auto *AFI = MF.getInfo<AArch64FunctionInfo>();
668 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
669 if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State())
670 return false;
671
672 assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!");
673
674 // Reset pass state.
675 State = PassState{};
676 this->MF = &MF;
677 Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
678 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
679 TII = Subtarget->getInstrInfo();
680 TRI = Subtarget->getRegisterInfo();
681 MRI = &MF.getRegInfo();
682
683 collectNeededZAStates(SMEFnAttrs);
684 assignBundleZAStates();
685 insertStateChanges();
686
687 // Allocate save buffer (if needed).
688 if (State.TPIDR2Block) {
689 MachineBasicBlock &EntryBlock = MF.front();
690 emitAllocateLazySaveBuffer(EntryBlock, EntryBlock.getFirstNonPHI());
691 }
692
693 return true;
694}
695
696FunctionPass *llvm::createMachineSMEABIPass() { return new MachineSMEABI(); }
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
Definition: BitmaskEnum.h:42
BlockVerifier::State From
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
#define MAKE_CASE(V)
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:270
A debug info location.
Definition: DebugLoc.h:124
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:31
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:117
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
bool isEHPad() const
Returns true if the block is a landing pad.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:72
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:78
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasPrivateZAInterface() const
bool hasZAState() const
bool hasZT0State() const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createMachineSMEABIPass()
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition: Error.cpp:177
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
@ None
Definition: CodeGenData.h:107
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2049
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39