LLVM 20.0.0git
AArch64CallingConvention.cpp
Go to the documentation of this file.
1//=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the table-generated and custom routines for the AArch64
10// Calling Convention.
11//
12//===----------------------------------------------------------------------===//
13
15#include "AArch64.h"
16#include "AArch64InstrInfo.h"
17#include "AArch64Subtarget.h"
20using namespace llvm;
21
22static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
23 AArch64::X3, AArch64::X4, AArch64::X5,
24 AArch64::X6, AArch64::X7};
25static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
26 AArch64::H3, AArch64::H4, AArch64::H5,
27 AArch64::H6, AArch64::H7};
28static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
29 AArch64::S3, AArch64::S4, AArch64::S5,
30 AArch64::S6, AArch64::S7};
31static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
32 AArch64::D3, AArch64::D4, AArch64::D5,
33 AArch64::D6, AArch64::D7};
34static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
35 AArch64::Q3, AArch64::Q4, AArch64::Q5,
36 AArch64::Q6, AArch64::Q7};
37static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
38 AArch64::Z3, AArch64::Z4, AArch64::Z5,
39 AArch64::Z6, AArch64::Z7};
40static const MCPhysReg PRegList[] = {AArch64::P0, AArch64::P1, AArch64::P2,
41 AArch64::P3};
42
44 MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
45 CCState &State, Align SlotAlign) {
46 if (LocVT.isScalableVector()) {
47 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
49 const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
50
51 // We are about to reinvoke the CCAssignFn auto-generated handler. If we
52 // don't unset these flags we will get stuck in an infinite loop forever
53 // invoking the custom handler.
54 ArgFlags.setInConsecutiveRegs(false);
55 ArgFlags.setInConsecutiveRegsLast(false);
56
57 // The calling convention for passing SVE tuples states that in the event
58 // we cannot allocate enough registers for the tuple we should still leave
59 // any remaining registers unallocated. However, when we call the
60 // CCAssignFn again we want it to behave as if all remaining registers are
61 // allocated. This will force the code to pass the tuple indirectly in
62 // accordance with the PCS.
63 bool ZRegsAllocated[8];
64 for (int I = 0; I < 8; I++) {
65 ZRegsAllocated[I] = State.isAllocated(ZRegList[I]);
66 State.AllocateReg(ZRegList[I]);
67 }
68 // The same applies to P registers.
69 bool PRegsAllocated[4];
70 for (int I = 0; I < 4; I++) {
71 PRegsAllocated[I] = State.isAllocated(PRegList[I]);
72 State.AllocateReg(PRegList[I]);
73 }
74
75 auto &It = PendingMembers[0];
76 CCAssignFn *AssignFn =
77 TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
78 if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
79 ArgFlags, State))
80 llvm_unreachable("Call operand has unhandled type");
81
82 // Return the flags to how they were before.
83 ArgFlags.setInConsecutiveRegs(true);
84 ArgFlags.setInConsecutiveRegsLast(true);
85
86 // Return the register state back to how it was before, leaving any
87 // unallocated registers available for other smaller types.
88 for (int I = 0; I < 8; I++)
89 if (!ZRegsAllocated[I])
90 State.DeallocateReg(ZRegList[I]);
91 for (int I = 0; I < 4; I++)
92 if (!PRegsAllocated[I])
93 State.DeallocateReg(PRegList[I]);
94
95 // All pending members have now been allocated
96 PendingMembers.clear();
97 return true;
98 }
99
100 unsigned Size = LocVT.getSizeInBits() / 8;
101 for (auto &It : PendingMembers) {
102 It.convertToMem(State.AllocateStack(Size, SlotAlign));
103 State.addLoc(It);
104 SlotAlign = Align(1);
105 }
106
107 // All pending members have now been allocated
108 PendingMembers.clear();
109 return true;
110}
111
112/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
113/// [N x Ty] type must still be contiguous in memory though.
115 unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
116 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
117 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
118
119 // Add the argument to the list to be allocated once we know the size of the
120 // block.
121 PendingMembers.push_back(
122 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
123
124 if (!ArgFlags.isInConsecutiveRegsLast())
125 return true;
126
127 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8));
128}
129
130/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
131/// registers. If no such sequence is available, mark the rest of the registers
132/// of that type as used and place the argument on the stack.
133static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
134 CCValAssign::LocInfo &LocInfo,
135 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
136 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
138 bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();
139
140 // Try to allocate a contiguous block of registers, each of the correct
141 // size to hold one member.
142 ArrayRef<MCPhysReg> RegList;
143 if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
144 RegList = XRegList;
145 else if (LocVT.SimpleTy == MVT::f16)
146 RegList = HRegList;
147 else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
148 RegList = SRegList;
149 else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
150 RegList = DRegList;
151 else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
152 RegList = QRegList;
153 else if (LocVT.isScalableVector()) {
154 // Scalable masks should be pass by Predicate registers.
155 if (LocVT == MVT::nxv1i1 || LocVT == MVT::nxv2i1 || LocVT == MVT::nxv4i1 ||
156 LocVT == MVT::nxv8i1 || LocVT == MVT::nxv16i1 ||
157 LocVT == MVT::aarch64svcount)
158 RegList = PRegList;
159 else
160 RegList = ZRegList;
161 } else {
162 // Not an array we want to split up after all.
163 return false;
164 }
165
166 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
167
168 // Add the argument to the list to be allocated once we know the size of the
169 // block.
170 PendingMembers.push_back(
171 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
172
173 if (!ArgFlags.isInConsecutiveRegsLast())
174 return true;
175
176 // [N x i32] arguments get packed into x-registers on Darwin's arm64_32
177 // because that's how the armv7k Clang front-end emits small structs.
178 unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
179 unsigned RegResult = State.AllocateRegBlock(
180 RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);
181 if (RegResult && EltsPerReg == 1) {
182 for (auto &It : PendingMembers) {
183 It.convertToReg(RegResult);
184 State.addLoc(It);
185 ++RegResult;
186 }
187 PendingMembers.clear();
188 return true;
189 } else if (RegResult) {
190 assert(EltsPerReg == 2 && "unexpected ABI");
191 bool UseHigh = false;
193 for (auto &It : PendingMembers) {
195 State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult,
196 MVT::i64, Info));
197 UseHigh = !UseHigh;
198 if (!UseHigh)
199 ++RegResult;
200 }
201 PendingMembers.clear();
202 return true;
203 }
204
205 if (!LocVT.isScalableVector()) {
206 // Mark all regs in the class as unavailable
207 for (auto Reg : RegList)
208 State.AllocateReg(Reg);
209 }
210
211 const MaybeAlign StackAlign =
213 assert(StackAlign && "data layout string is missing stack alignment");
214 const Align MemAlign = ArgFlags.getNonZeroMemAlign();
215 Align SlotAlign = std::min(MemAlign, *StackAlign);
216 if (!Subtarget.isTargetDarwin())
217 SlotAlign = std::max(SlotAlign, Align(8));
218
219 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
220}
221
222// TableGen provides definitions of the calling convention analysis entry
223// points.
224#include "AArch64GenCallingConv.inc"
static bool finishStackBlock(SmallVectorImpl< CCValAssign > &PendingMembers, MVT LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State, Align SlotAlign)
static const MCPhysReg XRegList[]
static const MCPhysReg SRegList[]
static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
Given an [N x Ty] block, it should be passed in a consecutive sequence of registers.
static const MCPhysReg ZRegList[]
static const MCPhysReg DRegList[]
static const MCPhysReg HRegList[]
static const MCPhysReg QRegList[]
static const MCPhysReg PRegList[]
static bool CC_AArch64_Custom_Stack_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
The Darwin variadic PCS places anonymous arguments in 8-byte stack slots.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
uint64_t Size
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const AArch64TargetLowering * getTargetLowering() const override
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
CallingConv::ID getCallingConv() const
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
MCPhysReg AllocateRegBlock(ArrayRef< MCPhysReg > Regs, unsigned RegsRequired)
AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive registers.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void DeallocateReg(MCPhysReg Reg)
SmallVectorImpl< CCValAssign > & getPendingLocs()
bool isAllocated(MCRegister Reg) const
isAllocated - Return true if the specified register (or an alias) is allocated.
void addLoc(const CCValAssign &V)
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
MaybeAlign getStackAlignment() const
Returns the natural stack alignment, or MaybeAlign() if one wasn't specified.
Definition: DataLayout.h:227
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
bool is32BitVector() const
Return true if this is a 32-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
void setInConsecutiveRegs(bool Flag=true)
bool isInConsecutiveRegsLast() const
Align getNonZeroMemAlign() const
void setInConsecutiveRegsLast(bool Flag=true)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117