LLVM 20.0.0git
AArch64StackTaggingPreRA.cpp
Go to the documentation of this file.
1//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9
10#include "AArch64.h"
12#include "AArch64InstrInfo.h"
13#include "llvm/ADT/SetVector.h"
14#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/Passes.h"
28#include "llvm/Support/Debug.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
34
36
38 "stack-tagging-unchecked-ld-st", cl::Hidden,
41 "Unconditionally apply unchecked-ld-st optimization (even for large "
42 "stack frames, or in the presence of variable sized allocas)."),
44 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
46 UncheckedSafe, "safe",
47 "apply unchecked-ld-st when the target is definitely within range"),
48 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
49
50static cl::opt<bool>
51 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
52 cl::desc("Apply first slot optimization for stack tagging "
53 "(eliminate ADDG Rt, Rn, 0, 0)."));
54
55namespace {
56
57class AArch64StackTaggingPreRA : public MachineFunctionPass {
63 const AArch64InstrInfo *TII;
64
66
67public:
68 static char ID;
69 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
71 }
72
73 bool mayUseUncheckedLoadStore();
74 void uncheckUsesOf(unsigned TaggedReg, int FI);
75 void uncheckLoadsAndStores();
76 std::optional<int> findFirstSlotCandidate();
77
78 bool runOnMachineFunction(MachineFunction &Func) override;
79 StringRef getPassName() const override {
80 return "AArch64 Stack Tagging PreRA";
81 }
82
83 void getAnalysisUsage(AnalysisUsage &AU) const override {
84 AU.setPreservesCFG();
86 }
87};
88} // end anonymous namespace
89
90char AArch64StackTaggingPreRA::ID = 0;
91
92INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
93 "AArch64 Stack Tagging PreRA Pass", false, false)
94INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
95 "AArch64 Stack Tagging PreRA Pass", false, false)
96
98 return new AArch64StackTaggingPreRA();
99}
100
101static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
102 switch (Opcode) {
103 case AArch64::LDRBBui:
104 case AArch64::LDRHHui:
105 case AArch64::LDRWui:
106 case AArch64::LDRXui:
107
108 case AArch64::LDRBui:
109 case AArch64::LDRHui:
110 case AArch64::LDRSui:
111 case AArch64::LDRDui:
112 case AArch64::LDRQui:
113
114 case AArch64::LDRSHWui:
115 case AArch64::LDRSHXui:
116
117 case AArch64::LDRSBWui:
118 case AArch64::LDRSBXui:
119
120 case AArch64::LDRSWui:
121
122 case AArch64::STRBBui:
123 case AArch64::STRHHui:
124 case AArch64::STRWui:
125 case AArch64::STRXui:
126
127 case AArch64::STRBui:
128 case AArch64::STRHui:
129 case AArch64::STRSui:
130 case AArch64::STRDui:
131 case AArch64::STRQui:
132
133 case AArch64::LDPWi:
134 case AArch64::LDPXi:
135 case AArch64::LDPSi:
136 case AArch64::LDPDi:
137 case AArch64::LDPQi:
138
139 case AArch64::LDPSWi:
140
141 case AArch64::STPWi:
142 case AArch64::STPXi:
143 case AArch64::STPSi:
144 case AArch64::STPDi:
145 case AArch64::STPQi:
146 return true;
147 default:
148 return false;
149 }
150}
151
152bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
154 return false;
156 return true;
157
158 // This estimate can be improved if we had harder guarantees about stack frame
159 // layout. With LocalStackAllocation we can estimate SP offset to any
160 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
161 // objects ahead of non-tagged ones, but that's not always desirable.
162 //
163 // Underestimating SP offset here may require the use of LDG to materialize
164 // the tagged address of the stack slot, along with a scratch register
165 // allocation (post-regalloc!).
166 //
167 // For now we do the safe thing here and require that the entire stack frame
168 // is within range of the shortest of the unchecked instructions.
169 unsigned FrameSize = 0;
170 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
171 FrameSize += MFI->getObjectSize(i);
172 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
173 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
174}
175
176void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
177 for (MachineInstr &UseI :
178 llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
179 if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
180 // FI operand is always the one before the immediate offset.
181 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
182 if (UseI.getOperand(OpIdx).isReg() &&
183 UseI.getOperand(OpIdx).getReg() == TaggedReg) {
184 UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
185 UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
186 }
187 } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) {
188 uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
189 }
190 }
191}
192
193void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
194 for (auto *I : ReTags) {
195 Register TaggedReg = I->getOperand(0).getReg();
196 int FI = I->getOperand(1).getIndex();
197 uncheckUsesOf(TaggedReg, FI);
198 }
199}
200
201namespace {
202struct SlotWithTag {
203 int FI;
204 int Tag;
205 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
206 explicit SlotWithTag(const MachineInstr &MI)
207 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
208 bool operator==(const SlotWithTag &Other) const {
209 return FI == Other.FI && Tag == Other.Tag;
210 }
211};
212} // namespace
213
214namespace llvm {
215template <> struct DenseMapInfo<SlotWithTag> {
216 static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
217 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
218 static unsigned getHashValue(const SlotWithTag &V) {
221 }
222 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
223 return A == B;
224 }
225};
226} // namespace llvm
227
228static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
229 return MFI->getUseLocalStackAllocationBlock() &&
230 MFI->isObjectPreAllocated(FI);
231}
232
233// Pin one of the tagged slots to offset 0 from the tagged base pointer.
234// This would make its address available in a virtual register (IRG's def), as
235// opposed to requiring an ADDG instruction to materialize. This effectively
236// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
237// live almost everywhere anyway), and therefore needs to happen before
238// regalloc.
239std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
240 // Find the best (FI, Tag) pair to pin to offset 0.
241 // Looking at the possible uses of a tagged address, the advantage of pinning
242 // is:
243 // - COPY to physical register.
244 // Does not matter, this would trade a MOV instruction for an ADDG.
245 // - ST*G matter, but those mostly appear near the function prologue where all
246 // the tagged addresses need to be materialized anyway; also, counting ST*G
247 // uses would overweight large allocas that require more than one ST*G
248 // instruction.
249 // - Load/Store instructions in the address operand do not require a tagged
250 // pointer, so they also do not benefit. These operands have already been
251 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
252 // instructions count.
253 // - Any other instruction may benefit from being pinned to offset 0.
254 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
255 if (!ClFirstSlot)
256 return std::nullopt;
257
259 SlotWithTag MaxScoreST{-1, -1};
260 int MaxScore = -1;
261 for (auto *I : ReTags) {
262 SlotWithTag ST{*I};
263 if (isSlotPreAllocated(MFI, ST.FI))
264 continue;
265
266 Register RetagReg = I->getOperand(0).getReg();
267 if (!RetagReg.isVirtual())
268 continue;
269
270 int Score = 0;
272 WorkList.push_back(RetagReg);
273
274 while (!WorkList.empty()) {
275 Register UseReg = WorkList.pop_back_val();
276 for (auto &UseI : MRI->use_instructions(UseReg)) {
277 unsigned Opcode = UseI.getOpcode();
278 if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
279 Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
280 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
281 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
282 Opcode == AArch64::STZGloop_wback)
283 continue;
284 if (UseI.isCopy()) {
285 Register DstReg = UseI.getOperand(0).getReg();
286 if (DstReg.isVirtual())
287 WorkList.push_back(DstReg);
288 continue;
289 }
290 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
291 << Register::virtReg2Index(UseReg) << " in " << UseI
292 << "\n");
293 Score++;
294 }
295 }
296
297 int TotalScore = RetagScore[ST] += Score;
298 if (TotalScore > MaxScore ||
299 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
300 MaxScore = TotalScore;
301 MaxScoreST = ST;
302 }
303 }
304
305 if (MaxScoreST.FI < 0)
306 return std::nullopt;
307
308 // If FI's tag is already 0, we are done.
309 if (MaxScoreST.Tag == 0)
310 return MaxScoreST.FI;
311
312 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
313 SlotWithTag SwapST{-1, -1};
314 for (auto *I : ReTags) {
315 SlotWithTag ST{*I};
316 if (ST.Tag == 0) {
317 SwapST = ST;
318 break;
319 }
320 }
321
322 // Swap tags between the victim and the highest scoring pair.
323 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
324 // the highest score slot without changing anything else.
325 for (auto *&I : ReTags) {
326 SlotWithTag ST{*I};
327 MachineOperand &TagOp = I->getOperand(4);
328 if (ST == MaxScoreST) {
329 TagOp.setImm(0);
330 } else if (ST == SwapST) {
331 TagOp.setImm(MaxScoreST.Tag);
332 }
333 }
334 return MaxScoreST.FI;
335}
336
337bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
338 MF = &Func;
339 MRI = &MF->getRegInfo();
340 AFI = MF->getInfo<AArch64FunctionInfo>();
341 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
342 TRI = static_cast<const AArch64RegisterInfo *>(
344 MFI = &MF->getFrameInfo();
345 ReTags.clear();
346
347 assert(MRI->isSSA());
348
349 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
350 << "********** Function: " << MF->getName() << '\n');
351
352 SmallSetVector<int, 8> TaggedSlots;
353 for (auto &BB : *MF) {
354 for (auto &I : BB) {
355 if (I.getOpcode() == AArch64::TAGPstack) {
356 ReTags.push_back(&I);
357 int FI = I.getOperand(1).getIndex();
358 TaggedSlots.insert(FI);
359 // There should be no offsets in TAGP yet.
360 assert(I.getOperand(2).getImm() == 0);
361 }
362 }
363 }
364
365 // Take over from SSP. It does nothing for tagged slots, and should not really
366 // have been enabled in the first place.
367 for (int FI : TaggedSlots)
369
370 if (ReTags.empty())
371 return false;
372
373 if (mayUseUncheckedLoadStore())
374 uncheckLoadsAndStores();
375
376 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
377 // If the base tagged pointer is set up to the address of this slot,
378 // the ADDG instruction can be eliminated.
379 std::optional<int> BaseSlot = findFirstSlotCandidate();
380 if (BaseSlot)
381 AFI->setTaggedBasePointerIndex(*BaseSlot);
382
383 for (auto *I : ReTags) {
384 int FI = I->getOperand(1).getIndex();
385 int Tag = I->getOperand(4).getImm();
386 Register Base = I->getOperand(3).getReg();
387 if (Tag == 0 && FI == BaseSlot) {
388 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
389 I->getOperand(0).getReg())
390 .addReg(Base);
391 I->eraseFromParent();
392 }
393 }
394
395 return true;
396}
unsigned const MachineRegisterInfo * MRI
aarch64 globals tagging
aarch64 stack tagging pre ra
static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI)
cl::opt< UncheckedLdStMode > ClUncheckedLdSt("stack-tagging-unchecked-ld-st", cl::Hidden, cl::init(UncheckedSafe), cl::desc("Unconditionally apply unchecked-ld-st optimization (even for large " "stack frames, or in the presence of variable sized allocas)."), cl::values(clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), clEnumValN(UncheckedSafe, "safe", "apply unchecked-ld-st when the target is definitely within range"), clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")))
static cl::opt< bool > ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), cl::desc("Apply first slot optimization for stack tagging " "(eliminate ADDG Rt, Rn, 0, 0)."))
static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode)
AArch64 Stack Tagging
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1309
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool isObjectPreAllocated(int ObjectIdx) const
Return true if the object was pre-allocated into the local block.
@ SSPLK_None
Did not trigger a stack protector.
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
bool getUseLocalStackAllocationBlock() const
Get whether the local allocation blob should be allocated together or let PEI allocate the locals in ...
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:95
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64StackTaggingPreRAPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializeAArch64StackTaggingPreRAPass(PassRegistry &)
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:593
static bool isEqual(const SlotWithTag &A, const SlotWithTag &B)
static unsigned getHashValue(const SlotWithTag &V)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52