LLVM 22.0.0git
MachineSMEABIPass.cpp
Go to the documentation of this file.
1//===- MachineSMEABIPass.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements the SME ABI requirements for ZA state. This includes
10// implementing the lazy (and agnostic) ZA state save schemes around calls.
11//
12//===----------------------------------------------------------------------===//
13//
14// This pass works by collecting instructions that require ZA to be in a
15// specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state
16// transitions to ensure ZA is in the required state before instructions. State
17// transitions represent actions such as setting up or restoring a lazy save.
18// Certain points within a function may also have predefined states independent
19// of any instructions, for example, a "shared_za" function is always entered
20// and exited in the "ACTIVE" state.
21//
22// To handle ZA state across control flow, we make use of edge bundling. This
23// assigns each block an "incoming" and "outgoing" edge bundle (representing
24// incoming and outgoing edges). Initially, these are unique to each block;
25// then, in the process of forming bundles, the outgoing bundle of a block is
26// joined with the incoming bundle of all successors. The result is that each
27// bundle can be assigned a single ZA state, which ensures the state required by
28// all a blocks' successors is the same, and that each basic block will always
29// be entered with the same ZA state. This eliminates the need for splitting
30// edges to insert state transitions or "phi" nodes for ZA states.
31//
32// See below for a simple example of edge bundling.
33//
34// The following shows a conditionally executed basic block (BB1):
35//
36// if (cond)
37// BB1
38// BB2
39//
40// Initial Bundles Joined Bundles
41//
42// ┌──0──┐ ┌──0──┐
43// │ BB0 │ │ BB0 │
44// └──1──┘ └──1──┘
45// ├───────┐ ├───────┐
46// ▼ │ ▼ │
47// ┌──2──┐ │ ─────► ┌──1──┐ │
48// │ BB1 │ ▼ │ BB1 │ ▼
49// └──3──┘ ┌──4──┐ └──1──┘ ┌──1──┐
50// └───►4 BB2 │ └───►1 BB2 │
51// └──5──┘ └──2──┘
52//
53// On the left are the initial per-block bundles, and on the right are the
54// joined bundles (which are the result of the EdgeBundles analysis).
55
56#include "AArch64InstrInfo.h"
58#include "AArch64Subtarget.h"
68
69using namespace llvm;
70
71#define DEBUG_TYPE "aarch64-machine-sme-abi"
72
73namespace {
74
75enum ZAState {
76 // Any/unknown state (not valid)
77 ANY = 0,
78
79 // ZA is in use and active (i.e. within the accumulator)
80 ACTIVE,
81
82 // A ZA save has been set up or committed (i.e. ZA is dormant or off)
83 LOCAL_SAVED,
84
85 // The ZA/ZT0 state on entry to the function.
86 ENTRY,
87
88 // ZA is off
89 OFF,
90
91 // The number of ZA states (not a valid state)
92 NUM_ZA_STATE
93};
94
95/// A bitmask enum to record live physical registers that the "emit*" routines
96/// may need to preserve. Note: This only tracks registers we may clobber.
97enum LiveRegs : uint8_t {
98 None = 0,
99 NZCV = 1 << 0,
100 W0 = 1 << 1,
101 W0_HI = 1 << 2,
102 X0 = W0 | W0_HI,
103 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ W0_HI)
104};
105
106/// Holds the virtual registers live physical registers have been saved to.
107struct PhysRegSave {
108 LiveRegs PhysLiveRegs;
109 Register StatusFlags = AArch64::NoRegister;
110 Register X0Save = AArch64::NoRegister;
111};
112
113/// Contains the needed ZA state (and live registers) at an instruction. That is
114/// the state ZA must be in _before_ "InsertPt".
115struct InstInfo {
116 ZAState NeededState{ZAState::ANY};
118 LiveRegs PhysLiveRegs = LiveRegs::None;
119};
120
121/// Contains the needed ZA state for each instruction in a block. Instructions
122/// that do not require a ZA state are not recorded.
123struct BlockInfo {
125 ZAState FixedEntryState{ZAState::ANY};
126 ZAState DesiredIncomingState{ZAState::ANY};
127 ZAState DesiredOutgoingState{ZAState::ANY};
128 LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
129 LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
130};
131
132/// Contains the needed ZA state information for all blocks within a function.
133struct FunctionInfo {
135 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
136 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
137};
138
139/// State/helpers that is only needed when emitting code to handle
140/// saving/restoring ZA.
141class EmitContext {
142public:
143 EmitContext() = default;
144
145 /// Get or create a TPIDR2 block in \p MF.
146 int getTPIDR2Block(MachineFunction &MF) {
147 if (TPIDR2BlockFI)
148 return *TPIDR2BlockFI;
149 MachineFrameInfo &MFI = MF.getFrameInfo();
150 TPIDR2BlockFI = MFI.CreateStackObject(16, Align(16), false);
151 return *TPIDR2BlockFI;
152 }
153
154 /// Get or create agnostic ZA buffer pointer in \p MF.
155 Register getAgnosticZABufferPtr(MachineFunction &MF) {
156 if (AgnosticZABufferPtr != AArch64::NoRegister)
157 return AgnosticZABufferPtr;
158 Register BufferPtr =
159 MF.getInfo<AArch64FunctionInfo>()->getEarlyAllocSMESaveBuffer();
160 AgnosticZABufferPtr =
161 BufferPtr != AArch64::NoRegister
162 ? BufferPtr
163 : MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
164 return AgnosticZABufferPtr;
165 }
166
167 /// Returns true if the function must allocate a ZA save buffer on entry. This
168 /// will be the case if, at any point in the function, a ZA save was emitted.
169 bool needsSaveBuffer() const {
170 assert(!(TPIDR2BlockFI && AgnosticZABufferPtr) &&
171 "Cannot have both a TPIDR2 block and agnostic ZA buffer");
172 return TPIDR2BlockFI || AgnosticZABufferPtr != AArch64::NoRegister;
173 }
174
175private:
176 std::optional<int> TPIDR2BlockFI;
177 Register AgnosticZABufferPtr = AArch64::NoRegister;
178};
179
180/// Checks if \p State is a legal edge bundle state. For a state to be a legal
181/// bundle state, it must be possible to transition from it to any other bundle
182/// state without losing any ZA state. This is the case for ACTIVE/LOCAL_SAVED,
183/// as you can transition between those states by saving/restoring ZA. The OFF
184/// state would not be legal, as transitioning to it drops the content of ZA.
185static bool isLegalEdgeBundleZAState(ZAState State) {
186 switch (State) {
187 case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
188 case ZAState::LOCAL_SAVED: // ZA state is saved on the stack.
189 return true;
190 default:
191 return false;
192 }
193}
194
195StringRef getZAStateString(ZAState State) {
196#define MAKE_CASE(V) \
197 case V: \
198 return #V;
199 switch (State) {
200 MAKE_CASE(ZAState::ANY)
201 MAKE_CASE(ZAState::ACTIVE)
202 MAKE_CASE(ZAState::LOCAL_SAVED)
203 MAKE_CASE(ZAState::ENTRY)
204 MAKE_CASE(ZAState::OFF)
205 default:
206 llvm_unreachable("Unexpected ZAState");
207 }
208#undef MAKE_CASE
209}
210
211static bool isZAorZTRegOp(const TargetRegisterInfo &TRI,
212 const MachineOperand &MO) {
213 if (!MO.isReg() || !MO.getReg().isPhysical())
214 return false;
215 return any_of(TRI.subregs_inclusive(MO.getReg()), [](const MCPhysReg &SR) {
216 return AArch64::MPR128RegClass.contains(SR) ||
217 AArch64::ZTRRegClass.contains(SR);
218 });
219}
220
221/// Returns the required ZA state needed before \p MI and an iterator pointing
222/// to where any code required to change the ZA state should be inserted.
223static std::pair<ZAState, MachineBasicBlock::iterator>
224getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI,
225 bool ZAOffAtReturn) {
227
228 if (MI.getOpcode() == AArch64::InOutZAUsePseudo)
229 return {ZAState::ACTIVE, std::prev(InsertPt)};
230
231 if (MI.getOpcode() == AArch64::RequiresZASavePseudo)
232 return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
233
234 if (MI.isReturn())
235 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
236
237 for (auto &MO : MI.operands()) {
238 if (isZAorZTRegOp(TRI, MO))
239 return {ZAState::ACTIVE, InsertPt};
240 }
241
242 return {ZAState::ANY, InsertPt};
243}
244
245struct MachineSMEABI : public MachineFunctionPass {
246 inline static char ID = 0;
247
248 MachineSMEABI(CodeGenOptLevel OptLevel = CodeGenOptLevel::Default)
249 : MachineFunctionPass(ID), OptLevel(OptLevel) {}
250
251 bool runOnMachineFunction(MachineFunction &MF) override;
252
253 StringRef getPassName() const override { return "Machine SME ABI pass"; }
254
255 void getAnalysisUsage(AnalysisUsage &AU) const override {
256 AU.setPreservesCFG();
261 }
262
263 /// Collects the needed ZA state (and live registers) before each instruction
264 /// within the machine function.
265 FunctionInfo collectNeededZAStates(SMEAttrs SMEFnAttrs);
266
267 /// Assigns each edge bundle a ZA state based on the needed states of blocks
268 /// that have incoming or outgoing edges in that bundle.
269 SmallVector<ZAState> assignBundleZAStates(const EdgeBundles &Bundles,
270 const FunctionInfo &FnInfo);
271
272 /// Inserts code to handle changes between ZA states within the function.
273 /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
274 void insertStateChanges(EmitContext &, const FunctionInfo &FnInfo,
275 const EdgeBundles &Bundles,
276 ArrayRef<ZAState> BundleStates);
277
278 /// Propagates desired states forwards (from predecessors -> successors) if
279 /// \p Forwards, otherwise, propagates backwards (from successors ->
280 /// predecessors).
281 void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true);
282
283 // Emission routines for private and shared ZA functions (using lazy saves).
284 void emitSMEPrologue(MachineBasicBlock &MBB,
286 void emitRestoreLazySave(EmitContext &, MachineBasicBlock &MBB,
288 LiveRegs PhysLiveRegs);
289 void emitSetupLazySave(EmitContext &, MachineBasicBlock &MBB,
291 void emitAllocateLazySaveBuffer(EmitContext &, MachineBasicBlock &MBB,
294 bool ClearTPIDR2);
295
296 // Emission routines for agnostic ZA functions.
297 void emitSetupFullZASave(MachineBasicBlock &MBB,
299 LiveRegs PhysLiveRegs);
300 // Emit a "full" ZA save or restore. It is "full" in the sense that this
301 // function will emit a call to __arm_sme_save or __arm_sme_restore, which
302 // handles saving and restoring both ZA and ZT0.
303 void emitFullZASaveRestore(EmitContext &, MachineBasicBlock &MBB,
305 LiveRegs PhysLiveRegs, bool IsSave);
306 void emitAllocateFullZASaveBuffer(EmitContext &, MachineBasicBlock &MBB,
308 LiveRegs PhysLiveRegs);
309
310 /// Attempts to find an insertion point before \p Inst where the status flags
311 /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of
312 /// the block is found.
313 std::pair<MachineBasicBlock::iterator, LiveRegs>
314 findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block,
316 void emitStateChange(EmitContext &, MachineBasicBlock &MBB,
317 MachineBasicBlock::iterator MBBI, ZAState From,
318 ZAState To, LiveRegs PhysLiveRegs);
319
320 // Helpers for switching between lazy/full ZA save/restore routines.
321 void emitZASave(EmitContext &Context, MachineBasicBlock &MBB,
323 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
324 return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
325 /*IsSave=*/true);
326 return emitSetupLazySave(Context, MBB, MBBI);
327 }
328 void emitZARestore(EmitContext &Context, MachineBasicBlock &MBB,
330 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
331 return emitFullZASaveRestore(Context, MBB, MBBI, PhysLiveRegs,
332 /*IsSave=*/false);
333 return emitRestoreLazySave(Context, MBB, MBBI, PhysLiveRegs);
334 }
335 void emitAllocateZASaveBuffer(EmitContext &Context, MachineBasicBlock &MBB,
337 LiveRegs PhysLiveRegs) {
338 if (AFI->getSMEFnAttrs().hasAgnosticZAInterface())
339 return emitAllocateFullZASaveBuffer(Context, MBB, MBBI, PhysLiveRegs);
340 return emitAllocateLazySaveBuffer(Context, MBB, MBBI);
341 }
342
343 /// Save live physical registers to virtual registers.
344 PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB,
346 /// Restore physical registers from a save of their previous values.
347 void restorePhyRegSave(const PhysRegSave &RegSave, MachineBasicBlock &MBB,
349
350private:
352
353 MachineFunction *MF = nullptr;
354 const AArch64Subtarget *Subtarget = nullptr;
355 const AArch64RegisterInfo *TRI = nullptr;
356 const AArch64FunctionInfo *AFI = nullptr;
357 const TargetInstrInfo *TII = nullptr;
358 MachineRegisterInfo *MRI = nullptr;
359 MachineLoopInfo *MLI = nullptr;
360};
361
362static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) {
363 LiveRegs PhysLiveRegs = LiveRegs::None;
364 if (!LiveUnits.available(AArch64::NZCV))
365 PhysLiveRegs |= LiveRegs::NZCV;
366 // We have to track W0 and X0 separately as otherwise things can get
367 // confused if we attempt to preserve X0 but only W0 was defined.
368 if (!LiveUnits.available(AArch64::W0))
369 PhysLiveRegs |= LiveRegs::W0;
370 if (!LiveUnits.available(AArch64::W0_HI))
371 PhysLiveRegs |= LiveRegs::W0_HI;
372 return PhysLiveRegs;
373}
374
375static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) {
376 if (PhysLiveRegs & LiveRegs::NZCV)
377 LiveUnits.addReg(AArch64::NZCV);
378 if (PhysLiveRegs & LiveRegs::W0)
379 LiveUnits.addReg(AArch64::W0);
380 if (PhysLiveRegs & LiveRegs::W0_HI)
381 LiveUnits.addReg(AArch64::W0_HI);
382}
383
384[[maybe_unused]] bool isCallStartOpcode(unsigned Opc) {
385 switch (Opc) {
386 case AArch64::TLSDESC_CALLSEQ:
387 case AArch64::TLSDESC_AUTH_CALLSEQ:
388 case AArch64::ADJCALLSTACKDOWN:
389 return true;
390 default:
391 return false;
392 }
393}
394
395FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
396 assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
397 SMEFnAttrs.hasZAState()) &&
398 "Expected function to have ZA/ZT0 state!");
399
401 LiveRegs PhysLiveRegsAfterSMEPrologue = LiveRegs::None;
402 std::optional<MachineBasicBlock::iterator> AfterSMEProloguePt;
403
404 for (MachineBasicBlock &MBB : *MF) {
405 BlockInfo &Block = Blocks[MBB.getNumber()];
406
407 if (MBB.isEntryBlock()) {
408 // Entry block:
409 Block.FixedEntryState = ZAState::ENTRY;
410 } else if (MBB.isEHPad()) {
411 // EH entry block:
412 Block.FixedEntryState = ZAState::LOCAL_SAVED;
413 }
414
415 LiveRegUnits LiveUnits(*TRI);
416 LiveUnits.addLiveOuts(MBB);
417
418 Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits);
419 auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
420 auto FirstNonPhiInsertPt = MBB.getFirstNonPHI();
421 for (MachineInstr &MI : reverse(MBB)) {
423 LiveUnits.stepBackward(MI);
424 LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits);
425 // The SMEStateAllocPseudo marker is added to a function if the save
426 // buffer was allocated in SelectionDAG. It marks the end of the
427 // allocation -- which is a safe point for this pass to insert any TPIDR2
428 // block setup.
429 if (MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
430 AfterSMEProloguePt = MBBI;
431 PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
432 }
433 // Note: We treat Agnostic ZA as inout_za with an alternate save/restore.
434 auto [NeededState, InsertPt] = getZAStateBeforeInst(
435 *TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface());
436 assert((InsertPt == MBBI || isCallStartOpcode(InsertPt->getOpcode())) &&
437 "Unexpected state change insertion point!");
438 // TODO: Do something to avoid state changes where NZCV is live.
439 if (MBBI == FirstTerminatorInsertPt)
440 Block.PhysLiveRegsAtExit = PhysLiveRegs;
441 if (MBBI == FirstNonPhiInsertPt)
442 Block.PhysLiveRegsAtEntry = PhysLiveRegs;
443 if (NeededState != ZAState::ANY)
444 Block.Insts.push_back({NeededState, InsertPt, PhysLiveRegs});
445 }
446
447 // Reverse vector (as we had to iterate backwards for liveness).
448 std::reverse(Block.Insts.begin(), Block.Insts.end());
449
450 // Record the desired states on entry/exit of this block. These are the
451 // states that would not incur a state transition.
452 if (!Block.Insts.empty()) {
453 Block.DesiredIncomingState = Block.Insts.front().NeededState;
454 Block.DesiredOutgoingState = Block.Insts.back().NeededState;
455 }
456 }
457
458 return FunctionInfo{std::move(Blocks), AfterSMEProloguePt,
459 PhysLiveRegsAfterSMEPrologue};
460}
461
462void MachineSMEABI::propagateDesiredStates(FunctionInfo &FnInfo,
463 bool Forwards) {
464 // If `Forwards`, this propagates desired states from predecessors to
465 // successors, otherwise, this propagates states from successors to
466 // predecessors.
467 auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
468 return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
469 };
470
472 for (auto [BlockID, BlockInfo] : enumerate(FnInfo.Blocks)) {
473 if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
474 Worklist.push_back(MF->getBlockNumbered(BlockID));
475 }
476
477 while (!Worklist.empty()) {
478 MachineBasicBlock *MBB = Worklist.pop_back_val();
479 BlockInfo &Block = FnInfo.Blocks[MBB->getNumber()];
480
481 // Pick a legal edge bundle state that matches the majority of
482 // predecessors/successors.
483 int StateCounts[ZAState::NUM_ZA_STATE] = {0};
484 for (MachineBasicBlock *PredOrSucc :
485 Forwards ? predecessors(MBB) : successors(MBB)) {
486 BlockInfo &PredOrSuccBlock = FnInfo.Blocks[PredOrSucc->getNumber()];
487 ZAState ZAState = GetBlockState(PredOrSuccBlock, !Forwards);
488 if (isLegalEdgeBundleZAState(ZAState))
489 StateCounts[ZAState]++;
490 }
491
492 ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
493 ZAState &CurrentState = GetBlockState(Block, Forwards);
494 if (PropagatedState != CurrentState) {
495 CurrentState = PropagatedState;
496 ZAState &OtherState = GetBlockState(Block, !Forwards);
497 // Propagate to the incoming/outgoing state if that is also "ANY".
498 if (OtherState == ZAState::ANY)
499 OtherState = PropagatedState;
500 // Push any successors/predecessors that may need updating to the
501 // worklist.
502 for (MachineBasicBlock *SuccOrPred :
503 Forwards ? successors(MBB) : predecessors(MBB)) {
504 BlockInfo &SuccOrPredBlock = FnInfo.Blocks[SuccOrPred->getNumber()];
505 if (!isLegalEdgeBundleZAState(GetBlockState(SuccOrPredBlock, Forwards)))
506 Worklist.push_back(SuccOrPred);
507 }
508 }
509 }
510}
511
512/// Assigns each edge bundle a ZA state based on the needed states of blocks
513/// that have incoming or outgoing edges in that bundle.
515MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
516 const FunctionInfo &FnInfo) {
517 SmallVector<ZAState> BundleStates(Bundles.getNumBundles());
518 for (unsigned I = 0, E = Bundles.getNumBundles(); I != E; ++I) {
519 LLVM_DEBUG(dbgs() << "Assigning ZA state for edge bundle: " << I << '\n');
520
521 // Attempt to assign a ZA state for this bundle that minimizes state
522 // transitions. Edges within loops are given a higher weight as we assume
523 // they will be executed more than once.
524 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
525 for (unsigned BlockID : Bundles.getBlocks(I)) {
526 LLVM_DEBUG(dbgs() << "- bb." << BlockID);
527
528 const BlockInfo &Block = FnInfo.Blocks[BlockID];
529 bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I;
530 bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I;
531
532 bool LegalInEdge =
533 InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
534 bool LegalOutEgde =
535 OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
536 if (LegalInEdge) {
537 LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
538 << getZAStateString(Block.DesiredIncomingState));
539 EdgeStateCounts[Block.DesiredIncomingState]++;
540 }
541 if (LegalOutEgde) {
542 LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
543 << getZAStateString(Block.DesiredOutgoingState));
544 EdgeStateCounts[Block.DesiredOutgoingState]++;
545 }
546 if (!LegalInEdge && !LegalOutEgde)
547 LLVM_DEBUG(dbgs() << " (no state preference)");
548 LLVM_DEBUG(dbgs() << '\n');
549 }
550
551 ZAState BundleState =
552 ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
553
554 if (BundleState == ZAState::ANY)
555 BundleState = ZAState::ACTIVE;
556
557 LLVM_DEBUG({
558 dbgs() << "Chosen ZA state: " << getZAStateString(BundleState) << '\n'
559 << "Edge counts:";
560 for (auto [State, Count] : enumerate(EdgeStateCounts))
561 dbgs() << " " << getZAStateString(ZAState(State)) << ": " << Count;
562 dbgs() << "\n\n";
563 });
564
565 BundleStates[I] = BundleState;
566 }
567
568 return BundleStates;
569}
570
571std::pair<MachineBasicBlock::iterator, LiveRegs>
572MachineSMEABI::findStateChangeInsertionPoint(
573 MachineBasicBlock &MBB, const BlockInfo &Block,
575 LiveRegs PhysLiveRegs;
577 if (Inst != Block.Insts.end()) {
578 InsertPt = Inst->InsertPt;
579 PhysLiveRegs = Inst->PhysLiveRegs;
580 } else {
581 InsertPt = MBB.getFirstTerminator();
582 PhysLiveRegs = Block.PhysLiveRegsAtExit;
583 }
584
585 if (!(PhysLiveRegs & LiveRegs::NZCV))
586 return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags).
587
588 // Find the previous state change. We can not move before this point.
589 MachineBasicBlock::iterator PrevStateChangeI;
590 if (Inst == Block.Insts.begin()) {
591 PrevStateChangeI = MBB.begin();
592 } else {
593 // Note: `std::prev(Inst)` is the previous InstInfo. We only create an
594 // InstInfo object for instructions that require a specific ZA state, so the
595 // InstInfo is the site of the previous state change in the block (which can
596 // be several MIs earlier).
597 PrevStateChangeI = std::prev(Inst)->InsertPt;
598 }
599
600 // Note: LiveUnits will only accurately track X0 and NZCV.
601 LiveRegUnits LiveUnits(*TRI);
602 setPhysLiveRegs(LiveUnits, PhysLiveRegs);
603 for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
604 // Don't move before/into a call (which may have a state change before it).
605 if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
606 break;
607 LiveUnits.stepBackward(*I);
608 if (LiveUnits.available(AArch64::NZCV))
609 return {I, getPhysLiveRegs(LiveUnits)};
610 }
611 return {InsertPt, PhysLiveRegs};
612}
613
614void MachineSMEABI::insertStateChanges(EmitContext &Context,
615 const FunctionInfo &FnInfo,
616 const EdgeBundles &Bundles,
617 ArrayRef<ZAState> BundleStates) {
618 for (MachineBasicBlock &MBB : *MF) {
619 const BlockInfo &Block = FnInfo.Blocks[MBB.getNumber()];
620 ZAState InState = BundleStates[Bundles.getBundle(MBB.getNumber(),
621 /*Out=*/false)];
622
623 ZAState CurrentState = Block.FixedEntryState;
624 if (CurrentState == ZAState::ANY)
625 CurrentState = InState;
626
627 for (auto &Inst : Block.Insts) {
628 if (CurrentState != Inst.NeededState) {
629 auto [InsertPt, PhysLiveRegs] =
630 findStateChangeInsertionPoint(MBB, Block, &Inst);
631 emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState,
632 PhysLiveRegs);
633 CurrentState = Inst.NeededState;
634 }
635 }
636
637 if (MBB.succ_empty())
638 continue;
639
640 ZAState OutState =
641 BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)];
642 if (CurrentState != OutState) {
643 auto [InsertPt, PhysLiveRegs] =
644 findStateChangeInsertionPoint(MBB, Block, Block.Insts.end());
645 emitStateChange(Context, MBB, InsertPt, CurrentState, OutState,
646 PhysLiveRegs);
647 }
648 }
649}
650
653 if (MBBI != MBB.end())
654 return MBBI->getDebugLoc();
655 return DebugLoc();
656}
657
658void MachineSMEABI::emitSetupLazySave(EmitContext &Context,
662
663 // Get pointer to TPIDR2 block.
664 Register TPIDR2 = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
665 Register TPIDR2Ptr = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
666 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
667 .addFrameIndex(Context.getTPIDR2Block(*MF))
668 .addImm(0)
669 .addImm(0);
670 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), TPIDR2Ptr)
671 .addReg(TPIDR2);
672 // Set TPIDR2_EL0 to point to TPIDR2 block.
673 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
674 .addImm(AArch64SysReg::TPIDR2_EL0)
675 .addReg(TPIDR2Ptr);
676}
677
678PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
681 DebugLoc DL) {
682 PhysRegSave RegSave{PhysLiveRegs};
683 if (PhysLiveRegs & LiveRegs::NZCV) {
684 RegSave.StatusFlags = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
685 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), RegSave.StatusFlags)
686 .addImm(AArch64SysReg::NZCV)
687 .addReg(AArch64::NZCV, RegState::Implicit);
688 }
689 // Note: Preserving X0 is "free" as this is before register allocation, so
690 // the register allocator is still able to optimize these copies.
691 if (PhysLiveRegs & LiveRegs::W0) {
692 RegSave.X0Save = MRI->createVirtualRegister(PhysLiveRegs & LiveRegs::W0_HI
693 ? &AArch64::GPR64RegClass
694 : &AArch64::GPR32RegClass);
695 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), RegSave.X0Save)
696 .addReg(PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0);
697 }
698 return RegSave;
699}
700
701void MachineSMEABI::restorePhyRegSave(const PhysRegSave &RegSave,
704 DebugLoc DL) {
705 if (RegSave.StatusFlags != AArch64::NoRegister)
706 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
707 .addImm(AArch64SysReg::NZCV)
708 .addReg(RegSave.StatusFlags)
709 .addReg(AArch64::NZCV, RegState::ImplicitDefine);
710
711 if (RegSave.X0Save != AArch64::NoRegister)
712 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY),
713 RegSave.PhysLiveRegs & LiveRegs::W0_HI ? AArch64::X0 : AArch64::W0)
714 .addReg(RegSave.X0Save);
715}
716
717void MachineSMEABI::emitRestoreLazySave(EmitContext &Context,
720 LiveRegs PhysLiveRegs) {
721 auto *TLI = Subtarget->getTargetLowering();
723 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
724 Register TPIDR2 = AArch64::X0;
725
726 // TODO: Emit these within the restore MBB to prevent unnecessary saves.
727 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
728
729 // Enable ZA.
730 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
731 .addImm(AArch64SVCR::SVCRZA)
732 .addImm(1);
733 // Get current TPIDR2_EL0.
734 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS), TPIDR2EL0)
735 .addImm(AArch64SysReg::TPIDR2_EL0);
736 // Get pointer to TPIDR2 block.
737 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), TPIDR2)
738 .addFrameIndex(Context.getTPIDR2Block(*MF))
739 .addImm(0)
740 .addImm(0);
741 // (Conditionally) restore ZA state.
742 BuildMI(MBB, MBBI, DL, TII->get(AArch64::RestoreZAPseudo))
743 .addReg(TPIDR2EL0)
744 .addReg(TPIDR2)
745 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_RESTORE))
746 .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
747 // Zero TPIDR2_EL0.
748 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
749 .addImm(AArch64SysReg::TPIDR2_EL0)
750 .addReg(AArch64::XZR);
751
752 restorePhyRegSave(RegSave, MBB, MBBI, DL);
753}
754
755void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
757 bool ClearTPIDR2) {
759
760 if (ClearTPIDR2)
761 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSR))
762 .addImm(AArch64SysReg::TPIDR2_EL0)
763 .addReg(AArch64::XZR);
764
765 // Disable ZA.
766 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
767 .addImm(AArch64SVCR::SVCRZA)
768 .addImm(0);
769}
770
771void MachineSMEABI::emitAllocateLazySaveBuffer(
772 EmitContext &Context, MachineBasicBlock &MBB,
774 MachineFrameInfo &MFI = MF->getFrameInfo();
776 Register SP = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
777 Register SVL = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
778 Register Buffer = AFI->getEarlyAllocSMESaveBuffer();
779
780 // Calculate SVL.
781 BuildMI(MBB, MBBI, DL, TII->get(AArch64::RDSVLI_XI), SVL).addImm(1);
782
783 // 1. Allocate the lazy save buffer.
784 if (Buffer == AArch64::NoRegister) {
785 // TODO: On Windows, we allocate the lazy save buffer in SelectionDAG (so
786 // Buffer != AArch64::NoRegister). This is done to reuse the existing
787 // expansions (which can insert stack checks). This works, but it means we
788 // will always allocate the lazy save buffer (even if the function contains
789 // no lazy saves). If we want to handle Windows here, we'll need to
790 // implement something similar to LowerWindowsDYNAMIC_STACKALLOC.
791 assert(!Subtarget->isTargetWindows() &&
792 "Lazy ZA save is not yet supported on Windows");
793 Buffer = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
794 // Get original stack pointer.
795 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), SP)
796 .addReg(AArch64::SP);
797 // Allocate a lazy-save buffer object of the size given, normally SVL * SVL
798 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSUBXrrr), Buffer)
799 .addReg(SVL)
800 .addReg(SVL)
801 .addReg(SP);
802 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::SP)
803 .addReg(Buffer);
804 // We have just allocated a variable sized object, tell this to PEI.
805 MFI.CreateVariableSizedObject(Align(16), nullptr);
806 }
807
808 // 2. Setup the TPIDR2 block.
809 {
810 // Note: This case just needs to do `SVL << 48`. It is not implemented as we
811 // generally don't support big-endian SVE/SME.
812 if (!Subtarget->isLittleEndian())
814 "TPIDR2 block initialization is not supported on big-endian targets");
815
816 // Store buffer pointer and num_za_save_slices.
817 // Bytes 10-15 are implicitly zeroed.
818 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STPXi))
819 .addReg(Buffer)
820 .addReg(SVL)
821 .addFrameIndex(Context.getTPIDR2Block(*MF))
822 .addImm(0);
823 }
824}
825
826static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111;
827
828void MachineSMEABI::emitSMEPrologue(MachineBasicBlock &MBB,
830 auto *TLI = Subtarget->getTargetLowering();
832
833 bool ZeroZA = AFI->getSMEFnAttrs().isNewZA();
834 bool ZeroZT0 = AFI->getSMEFnAttrs().isNewZT0();
836 // Get current TPIDR2_EL0.
837 Register TPIDR2EL0 = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
838 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MRS))
839 .addReg(TPIDR2EL0, RegState::Define)
840 .addImm(AArch64SysReg::TPIDR2_EL0);
841 // If TPIDR2_EL0 is non-zero, commit the lazy save.
842 // NOTE: Functions that only use ZT0 don't need to zero ZA.
843 auto CommitZASave =
844 BuildMI(MBB, MBBI, DL, TII->get(AArch64::CommitZASavePseudo))
845 .addReg(TPIDR2EL0)
846 .addImm(ZeroZA)
847 .addImm(ZeroZT0)
848 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_TPIDR2_SAVE))
849 .addRegMask(TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
850 if (ZeroZA)
851 CommitZASave.addDef(AArch64::ZAB0, RegState::ImplicitDefine);
852 if (ZeroZT0)
853 CommitZASave.addDef(AArch64::ZT0, RegState::ImplicitDefine);
854 // Enable ZA (as ZA could have previously been in the OFF state).
855 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
856 .addImm(AArch64SVCR::SVCRZA)
857 .addImm(1);
858 } else if (AFI->getSMEFnAttrs().hasSharedZAInterface()) {
859 if (ZeroZA)
860 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ZERO_M))
862 .addDef(AArch64::ZAB0, RegState::ImplicitDefine);
863 if (ZeroZT0)
864 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ZERO_T)).addDef(AArch64::ZT0);
865 }
866}
867
868void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context,
871 LiveRegs PhysLiveRegs, bool IsSave) {
872 auto *TLI = Subtarget->getTargetLowering();
874 Register BufferPtr = AArch64::X0;
875
876 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
877
878 // Copy the buffer pointer into X0.
879 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
880 .addReg(Context.getAgnosticZABufferPtr(*MF));
881
882 // Call __arm_sme_save/__arm_sme_restore.
883 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
884 .addReg(BufferPtr, RegState::Implicit)
885 .addExternalSymbol(TLI->getLibcallName(
886 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE))
887 .addRegMask(TRI->getCallPreservedMask(
888 *MF,
890
891 restorePhyRegSave(RegSave, MBB, MBBI, DL);
892}
893
894void MachineSMEABI::emitAllocateFullZASaveBuffer(
895 EmitContext &Context, MachineBasicBlock &MBB,
897 // Buffer already allocated in SelectionDAG.
899 return;
900
902 Register BufferPtr = Context.getAgnosticZABufferPtr(*MF);
903 Register BufferSize = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
904
905 PhysRegSave RegSave = createPhysRegSave(PhysLiveRegs, MBB, MBBI, DL);
906
907 // Calculate the SME state size.
908 {
909 auto *TLI = Subtarget->getTargetLowering();
910 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
911 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
912 .addExternalSymbol(TLI->getLibcallName(RTLIB::SMEABI_SME_STATE_SIZE))
913 .addReg(AArch64::X0, RegState::ImplicitDefine)
914 .addRegMask(TRI->getCallPreservedMask(
915 *MF, CallingConv::
917 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferSize)
918 .addReg(AArch64::X0);
919 }
920
921 // Allocate a buffer object of the size given __arm_sme_state_size.
922 {
923 MachineFrameInfo &MFI = MF->getFrameInfo();
924 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
925 .addReg(AArch64::SP)
926 .addReg(BufferSize)
928 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BufferPtr)
929 .addReg(AArch64::SP);
930
931 // We have just allocated a variable sized object, tell this to PEI.
932 MFI.CreateVariableSizedObject(Align(16), nullptr);
933 }
934
935 restorePhyRegSave(RegSave, MBB, MBBI, DL);
936}
937
938void MachineSMEABI::emitStateChange(EmitContext &Context,
941 ZAState From, ZAState To,
942 LiveRegs PhysLiveRegs) {
943 // ZA not used.
944 if (From == ZAState::ANY || To == ZAState::ANY)
945 return;
946
947 // If we're exiting from the ENTRY state that means that the function has not
948 // used ZA, so in the case of private ZA/ZT0 functions we can omit any set up.
949 if (From == ZAState::ENTRY && To == ZAState::OFF)
950 return;
951
952 [[maybe_unused]] SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
953
954 // TODO: Avoid setting up the save buffer if there's no transition to
955 // LOCAL_SAVED.
956 if (From == ZAState::ENTRY) {
957 assert(&MBB == &MBB.getParent()->front() &&
958 "ENTRY state only valid in entry block");
959 emitSMEPrologue(MBB, MBB.getFirstNonPHI());
960 if (To == ZAState::ACTIVE)
961 return; // Nothing more to do (ZA is active after the prologue).
962
963 // Note: "emitNewZAPrologue" zeros ZA, so we may need to setup a lazy save
964 // if "To" is "ZAState::LOCAL_SAVED". It may be possible to improve this
965 // case by changing the placement of the zero instruction.
966 From = ZAState::ACTIVE;
967 }
968
969 if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
970 emitZASave(Context, MBB, InsertPt, PhysLiveRegs);
971 else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
972 emitZARestore(Context, MBB, InsertPt, PhysLiveRegs);
973 else if (To == ZAState::OFF) {
974 assert(From != ZAState::ENTRY &&
975 "ENTRY to OFF should have already been handled");
976 assert(!SMEFnAttrs.hasAgnosticZAInterface() &&
977 "Should not turn ZA off in agnostic ZA function");
978 emitZAOff(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED);
979 } else {
980 dbgs() << "Error: Transition from " << getZAStateString(From) << " to "
981 << getZAStateString(To) << '\n';
982 llvm_unreachable("Unimplemented state transition");
983 }
984}
985
986} // end anonymous namespace
987
988INITIALIZE_PASS(MachineSMEABI, "aarch64-machine-sme-abi", "Machine SME ABI",
989 false, false)
990
991bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
992 if (!MF.getSubtarget<AArch64Subtarget>().hasSME())
993 return false;
994
995 AFI = MF.getInfo<AArch64FunctionInfo>();
996 SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs();
997 if (!SMEFnAttrs.hasZAState() && !SMEFnAttrs.hasZT0State() &&
998 !SMEFnAttrs.hasAgnosticZAInterface())
999 return false;
1000
1001 assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!");
1002
1003 this->MF = &MF;
1004 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
1005 TII = Subtarget->getInstrInfo();
1006 TRI = Subtarget->getRegisterInfo();
1007 MRI = &MF.getRegInfo();
1008
1009 const EdgeBundles &Bundles =
1010 getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
1011
1012 FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
1013
1014 if (OptLevel != CodeGenOptLevel::None) {
1015 // Propagate desired states forward, then backwards. Most of the propagation
1016 // should be done in the forward step, and backwards propagation is then
1017 // used to fill in the gaps. Note: Doing both in one step can give poor
1018 // results. For example, consider this subgraph:
1019 //
1020 // ┌─────┐
1021 // ┌─┤ BB0 ◄───┐
1022 // │ └─┬───┘ │
1023 // │ ┌─▼───◄──┐│
1024 // │ │ BB1 │ ││
1025 // │ └─┬┬──┘ ││
1026 // │ │└─────┘│
1027 // │ ┌─▼───┐ │
1028 // │ │ BB2 ├───┘
1029 // │ └─┬───┘
1030 // │ ┌─▼───┐
1031 // └─► BB3 │
1032 // └─────┘
1033 //
1034 // If:
1035 // - "BB0" and "BB2" (outer loop) has no state preference
1036 // - "BB1" (inner loop) desires the ACTIVE state on entry/exit
1037 // - "BB3" desires the LOCAL_SAVED state on entry
1038 //
1039 // If we propagate forwards first, ACTIVE is propagated from BB1 to BB2,
1040 // then from BB2 to BB0. Which results in the inner and outer loops having
1041 // the "ACTIVE" state. This avoids any state changes in the loops.
1042 //
1043 // If we propagate backwards first, we _could_ propagate LOCAL_SAVED from
1044 // BB3 to BB0, which would result in a transition from ACTIVE -> LOCAL_SAVED
1045 // in the outer loop.
1046 for (bool Forwards : {true, false})
1047 propagateDesiredStates(FnInfo, Forwards);
1048 }
1049
1050 SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo);
1051
1052 EmitContext Context;
1053 insertStateChanges(Context, FnInfo, Bundles, BundleStates);
1054
1055 if (Context.needsSaveBuffer()) {
1056 if (FnInfo.AfterSMEProloguePt) {
1057 // Note: With inline stack probes the AfterSMEProloguePt may not be in the
1058 // entry block (due to the probing loop).
1059 MachineBasicBlock::iterator MBBI = *FnInfo.AfterSMEProloguePt;
1060 emitAllocateZASaveBuffer(Context, *MBBI->getParent(), MBBI,
1061 FnInfo.PhysLiveRegsAfterSMEPrologue);
1062 } else {
1063 MachineBasicBlock &EntryBlock = MF.front();
1064 emitAllocateZASaveBuffer(
1065 Context, EntryBlock, EntryBlock.getFirstNonPHI(),
1066 FnInfo.Blocks[EntryBlock.getNumber()].PhysLiveRegsAtEntry);
1067 }
1068 }
1069
1070 return true;
1071}
1072
1074 return new MachineSMEABI(OptLevel);
1075}
unsigned const MachineRegisterInfo * MRI
static constexpr unsigned ZERO_ALL_ZA_MASK
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define ENTRY(ASMNAME, ENUM)
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
#define MAKE_CASE(V)
Register const TargetRegisterInfo * TRI
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A debug info location.
Definition DebugLoc.h:124
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
Definition EdgeBundles.h:53
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition EdgeBundles.h:47
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition EdgeBundles.h:50
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasAgnosticZAInterface() const
bool hasPrivateZAInterface() const
bool hasSharedZAInterface() const
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
Definition CallingConv.h:21
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
auto successors(const MachineBasicBlock *BB)
FunctionPass * createMachineSMEABIPass(CodeGenOptLevel)
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
@ Default
-O2, -Os, -Oz
Definition CodeGen.h:85
@ LLVM_MARK_AS_BITMASK_ENUM
Definition ModRef.h:37
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2030
auto predecessors(const MachineBasicBlock *BB)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...