LLVM 20.0.0git
AArch64StackTaggingPreRA.cpp
Go to the documentation of this file.
1//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64.h"
10#include "AArch64InstrInfo.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/Statistic.h"
20#include "llvm/CodeGen/Passes.h"
25#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
31
33
35 "stack-tagging-unchecked-ld-st", cl::Hidden,
38 "Unconditionally apply unchecked-ld-st optimization (even for large "
39 "stack frames, or in the presence of variable sized allocas)."),
41 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
43 UncheckedSafe, "safe",
44 "apply unchecked-ld-st when the target is definitely within range"),
45 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
46
47static cl::opt<bool>
48 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
49 cl::desc("Apply first slot optimization for stack tagging "
50 "(eliminate ADDG Rt, Rn, 0, 0)."));
51
52namespace {
53
54class AArch64StackTaggingPreRA : public MachineFunctionPass {
60 const AArch64InstrInfo *TII;
61
63
64public:
65 static char ID;
66 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
68 }
69
70 bool mayUseUncheckedLoadStore();
71 void uncheckUsesOf(unsigned TaggedReg, int FI);
72 void uncheckLoadsAndStores();
73 std::optional<int> findFirstSlotCandidate();
74
75 bool runOnMachineFunction(MachineFunction &Func) override;
76 StringRef getPassName() const override {
77 return "AArch64 Stack Tagging PreRA";
78 }
79
80 void getAnalysisUsage(AnalysisUsage &AU) const override {
81 AU.setPreservesCFG();
83 }
84};
85} // end anonymous namespace
86
87char AArch64StackTaggingPreRA::ID = 0;
88
89INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
90 "AArch64 Stack Tagging PreRA Pass", false, false)
91INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
92 "AArch64 Stack Tagging PreRA Pass", false, false)
93
95 return new AArch64StackTaggingPreRA();
96}
97
98static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
99 switch (Opcode) {
100 case AArch64::LDRBBui:
101 case AArch64::LDRHHui:
102 case AArch64::LDRWui:
103 case AArch64::LDRXui:
104
105 case AArch64::LDRBui:
106 case AArch64::LDRHui:
107 case AArch64::LDRSui:
108 case AArch64::LDRDui:
109 case AArch64::LDRQui:
110
111 case AArch64::LDRSHWui:
112 case AArch64::LDRSHXui:
113
114 case AArch64::LDRSBWui:
115 case AArch64::LDRSBXui:
116
117 case AArch64::LDRSWui:
118
119 case AArch64::STRBBui:
120 case AArch64::STRHHui:
121 case AArch64::STRWui:
122 case AArch64::STRXui:
123
124 case AArch64::STRBui:
125 case AArch64::STRHui:
126 case AArch64::STRSui:
127 case AArch64::STRDui:
128 case AArch64::STRQui:
129
130 case AArch64::LDPWi:
131 case AArch64::LDPXi:
132 case AArch64::LDPSi:
133 case AArch64::LDPDi:
134 case AArch64::LDPQi:
135
136 case AArch64::LDPSWi:
137
138 case AArch64::STPWi:
139 case AArch64::STPXi:
140 case AArch64::STPSi:
141 case AArch64::STPDi:
142 case AArch64::STPQi:
143 return true;
144 default:
145 return false;
146 }
147}
148
149bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
151 return false;
153 return true;
154
155 // This estimate can be improved if we had harder guarantees about stack frame
156 // layout. With LocalStackAllocation we can estimate SP offset to any
157 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
158 // objects ahead of non-tagged ones, but that's not always desirable.
159 //
160 // Underestimating SP offset here may require the use of LDG to materialize
161 // the tagged address of the stack slot, along with a scratch register
162 // allocation (post-regalloc!).
163 //
164 // For now we do the safe thing here and require that the entire stack frame
165 // is within range of the shortest of the unchecked instructions.
166 unsigned FrameSize = 0;
167 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
168 FrameSize += MFI->getObjectSize(i);
169 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
170 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
171}
172
173void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
174 for (MachineInstr &UseI :
175 llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
176 if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
177 // FI operand is always the one before the immediate offset.
178 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
179 if (UseI.getOperand(OpIdx).isReg() &&
180 UseI.getOperand(OpIdx).getReg() == TaggedReg) {
181 UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
182 UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
183 }
184 } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) {
185 uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
186 }
187 }
188}
189
190void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
191 for (auto *I : ReTags) {
192 Register TaggedReg = I->getOperand(0).getReg();
193 int FI = I->getOperand(1).getIndex();
194 uncheckUsesOf(TaggedReg, FI);
195 }
196}
197
198namespace {
199struct SlotWithTag {
200 int FI;
201 int Tag;
202 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
203 explicit SlotWithTag(const MachineInstr &MI)
204 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
205 bool operator==(const SlotWithTag &Other) const {
206 return FI == Other.FI && Tag == Other.Tag;
207 }
208};
209} // namespace
210
211namespace llvm {
212template <> struct DenseMapInfo<SlotWithTag> {
213 static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
214 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
215 static unsigned getHashValue(const SlotWithTag &V) {
218 }
219 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
220 return A == B;
221 }
222};
223} // namespace llvm
224
225static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
226 return MFI->getUseLocalStackAllocationBlock() &&
227 MFI->isObjectPreAllocated(FI);
228}
229
230// Pin one of the tagged slots to offset 0 from the tagged base pointer.
231// This would make its address available in a virtual register (IRG's def), as
232// opposed to requiring an ADDG instruction to materialize. This effectively
233// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
234// live almost everywhere anyway), and therefore needs to happen before
235// regalloc.
236std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
237 // Find the best (FI, Tag) pair to pin to offset 0.
238 // Looking at the possible uses of a tagged address, the advantage of pinning
239 // is:
240 // - COPY to physical register.
241 // Does not matter, this would trade a MOV instruction for an ADDG.
242 // - ST*G matter, but those mostly appear near the function prologue where all
243 // the tagged addresses need to be materialized anyway; also, counting ST*G
244 // uses would overweight large allocas that require more than one ST*G
245 // instruction.
246 // - Load/Store instructions in the address operand do not require a tagged
247 // pointer, so they also do not benefit. These operands have already been
248 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
249 // instructions count.
250 // - Any other instruction may benefit from being pinned to offset 0.
251 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
252 if (!ClFirstSlot)
253 return std::nullopt;
254
256 SlotWithTag MaxScoreST{-1, -1};
257 int MaxScore = -1;
258 for (auto *I : ReTags) {
259 SlotWithTag ST{*I};
260 if (isSlotPreAllocated(MFI, ST.FI))
261 continue;
262
263 Register RetagReg = I->getOperand(0).getReg();
264 if (!RetagReg.isVirtual())
265 continue;
266
267 int Score = 0;
269 WorkList.push_back(RetagReg);
270
271 while (!WorkList.empty()) {
272 Register UseReg = WorkList.pop_back_val();
273 for (auto &UseI : MRI->use_instructions(UseReg)) {
274 unsigned Opcode = UseI.getOpcode();
275 if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
276 Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
277 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
278 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
279 Opcode == AArch64::STZGloop_wback)
280 continue;
281 if (UseI.isCopy()) {
282 Register DstReg = UseI.getOperand(0).getReg();
283 if (DstReg.isVirtual())
284 WorkList.push_back(DstReg);
285 continue;
286 }
287 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
288 << Register::virtReg2Index(UseReg) << " in " << UseI
289 << "\n");
290 Score++;
291 }
292 }
293
294 int TotalScore = RetagScore[ST] += Score;
295 if (TotalScore > MaxScore ||
296 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
297 MaxScore = TotalScore;
298 MaxScoreST = ST;
299 }
300 }
301
302 if (MaxScoreST.FI < 0)
303 return std::nullopt;
304
305 // If FI's tag is already 0, we are done.
306 if (MaxScoreST.Tag == 0)
307 return MaxScoreST.FI;
308
309 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
310 SlotWithTag SwapST{-1, -1};
311 for (auto *I : ReTags) {
312 SlotWithTag ST{*I};
313 if (ST.Tag == 0) {
314 SwapST = ST;
315 break;
316 }
317 }
318
319 // Swap tags between the victim and the highest scoring pair.
320 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
321 // the highest score slot without changing anything else.
322 for (auto *&I : ReTags) {
323 SlotWithTag ST{*I};
324 MachineOperand &TagOp = I->getOperand(4);
325 if (ST == MaxScoreST) {
326 TagOp.setImm(0);
327 } else if (ST == SwapST) {
328 TagOp.setImm(MaxScoreST.Tag);
329 }
330 }
331 return MaxScoreST.FI;
332}
333
334bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
335 MF = &Func;
336 MRI = &MF->getRegInfo();
337 AFI = MF->getInfo<AArch64FunctionInfo>();
338 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
339 TRI = static_cast<const AArch64RegisterInfo *>(
341 MFI = &MF->getFrameInfo();
342 ReTags.clear();
343
344 assert(MRI->isSSA());
345
346 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
347 << "********** Function: " << MF->getName() << '\n');
348
349 SmallSetVector<int, 8> TaggedSlots;
350 for (auto &BB : *MF) {
351 for (auto &I : BB) {
352 if (I.getOpcode() == AArch64::TAGPstack) {
353 ReTags.push_back(&I);
354 int FI = I.getOperand(1).getIndex();
355 TaggedSlots.insert(FI);
356 // There should be no offsets in TAGP yet.
357 assert(I.getOperand(2).getImm() == 0);
358 }
359 }
360 }
361
362 // Take over from SSP. It does nothing for tagged slots, and should not really
363 // have been enabled in the first place.
364 for (int FI : TaggedSlots)
366
367 if (ReTags.empty())
368 return false;
369
370 if (mayUseUncheckedLoadStore())
371 uncheckLoadsAndStores();
372
373 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
374 // If the base tagged pointer is set up to the address of this slot,
375 // the ADDG instruction can be eliminated.
376 std::optional<int> BaseSlot = findFirstSlotCandidate();
377 if (BaseSlot)
378 AFI->setTaggedBasePointerIndex(*BaseSlot);
379
380 for (auto *I : ReTags) {
381 int FI = I->getOperand(1).getIndex();
382 int Tag = I->getOperand(4).getImm();
383 Register Base = I->getOperand(3).getReg();
384 if (Tag == 0 && FI == BaseSlot) {
385 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
386 I->getOperand(0).getReg())
387 .addReg(Base);
388 I->eraseFromParent();
389 }
390 }
391
392 return true;
393}
unsigned const MachineRegisterInfo * MRI
aarch64 stack tagging pre ra
static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI)
cl::opt< UncheckedLdStMode > ClUncheckedLdSt("stack-tagging-unchecked-ld-st", cl::Hidden, cl::init(UncheckedSafe), cl::desc("Unconditionally apply unchecked-ld-st optimization (even for large " "stack frames, or in the presence of variable sized allocas)."), cl::values(clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), clEnumValN(UncheckedSafe, "safe", "apply unchecked-ld-st when the target is definitely within range"), clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")))
static cl::opt< bool > ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), cl::desc("Apply first slot optimization for stack tagging " "(eliminate ADDG Rt, Rn, 0, 0)."))
static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode)
AArch64 Stack Tagging
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool isObjectPreAllocated(int ObjectIdx) const
Return true if the object was pre-allocated into the local block.
@ SSPLK_None
Did not trigger a stack protector.
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
bool getUseLocalStackAllocationBlock() const
Get whether the local allocation blob should be allocated together or let PEI allocate the locals in ...
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64StackTaggingPreRAPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializeAArch64StackTaggingPreRAPass(PassRegistry &)
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:590
static bool isEqual(const SlotWithTag &A, const SlotWithTag &B)
static unsigned getHashValue(const SlotWithTag &V)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52