LLVM 18.0.0git
AArch64StackTaggingPreRA.cpp
Go to the documentation of this file.
1//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9
10#include "AArch64.h"
12#include "AArch64InstrInfo.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/ADT/Statistic.h"
24#include "llvm/CodeGen/Passes.h"
29#include "llvm/Support/Debug.h"
31
32using namespace llvm;
33
34#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
35
37
39 "stack-tagging-unchecked-ld-st", cl::Hidden,
42 "Unconditionally apply unchecked-ld-st optimization (even for large "
43 "stack frames, or in the presence of variable sized allocas)."),
45 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
47 UncheckedSafe, "safe",
48 "apply unchecked-ld-st when the target is definitely within range"),
49 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
50
51static cl::opt<bool>
52 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
53 cl::desc("Apply first slot optimization for stack tagging "
54 "(eliminate ADDG Rt, Rn, 0, 0)."));
55
56namespace {
57
58class AArch64StackTaggingPreRA : public MachineFunctionPass {
64 const AArch64InstrInfo *TII;
65
67
68public:
69 static char ID;
70 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
72 }
73
74 bool mayUseUncheckedLoadStore();
75 void uncheckUsesOf(unsigned TaggedReg, int FI);
76 void uncheckLoadsAndStores();
77 std::optional<int> findFirstSlotCandidate();
78
79 bool runOnMachineFunction(MachineFunction &Func) override;
80 StringRef getPassName() const override {
81 return "AArch64 Stack Tagging PreRA";
82 }
83
84 void getAnalysisUsage(AnalysisUsage &AU) const override {
85 AU.setPreservesCFG();
87 }
88};
89} // end anonymous namespace
90
91char AArch64StackTaggingPreRA::ID = 0;
92
93INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
94 "AArch64 Stack Tagging PreRA Pass", false, false)
95INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
96 "AArch64 Stack Tagging PreRA Pass", false, false)
97
99 return new AArch64StackTaggingPreRA();
100}
101
102static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
103 switch (Opcode) {
104 case AArch64::LDRBBui:
105 case AArch64::LDRHHui:
106 case AArch64::LDRWui:
107 case AArch64::LDRXui:
108
109 case AArch64::LDRBui:
110 case AArch64::LDRHui:
111 case AArch64::LDRSui:
112 case AArch64::LDRDui:
113 case AArch64::LDRQui:
114
115 case AArch64::LDRSHWui:
116 case AArch64::LDRSHXui:
117
118 case AArch64::LDRSBWui:
119 case AArch64::LDRSBXui:
120
121 case AArch64::LDRSWui:
122
123 case AArch64::STRBBui:
124 case AArch64::STRHHui:
125 case AArch64::STRWui:
126 case AArch64::STRXui:
127
128 case AArch64::STRBui:
129 case AArch64::STRHui:
130 case AArch64::STRSui:
131 case AArch64::STRDui:
132 case AArch64::STRQui:
133
134 case AArch64::LDPWi:
135 case AArch64::LDPXi:
136 case AArch64::LDPSi:
137 case AArch64::LDPDi:
138 case AArch64::LDPQi:
139
140 case AArch64::LDPSWi:
141
142 case AArch64::STPWi:
143 case AArch64::STPXi:
144 case AArch64::STPSi:
145 case AArch64::STPDi:
146 case AArch64::STPQi:
147 return true;
148 default:
149 return false;
150 }
151}
152
153bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
155 return false;
157 return true;
158
159 // This estimate can be improved if we had harder guarantees about stack frame
160 // layout. With LocalStackAllocation we can estimate SP offset to any
161 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
162 // objects ahead of non-tagged ones, but that's not always desirable.
163 //
164 // Underestimating SP offset here may require the use of LDG to materialize
165 // the tagged address of the stack slot, along with a scratch register
166 // allocation (post-regalloc!).
167 //
168 // For now we do the safe thing here and require that the entire stack frame
169 // is within range of the shortest of the unchecked instructions.
170 unsigned FrameSize = 0;
171 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
172 FrameSize += MFI->getObjectSize(i);
173 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
174 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
175}
176
177void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
178 for (MachineInstr &UseI :
179 llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
180 if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
181 // FI operand is always the one before the immediate offset.
182 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
183 if (UseI.getOperand(OpIdx).isReg() &&
184 UseI.getOperand(OpIdx).getReg() == TaggedReg) {
185 UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
186 UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
187 }
188 } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) {
189 uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
190 }
191 }
192}
193
194void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
195 for (auto *I : ReTags) {
196 Register TaggedReg = I->getOperand(0).getReg();
197 int FI = I->getOperand(1).getIndex();
198 uncheckUsesOf(TaggedReg, FI);
199 }
200}
201
202namespace {
203struct SlotWithTag {
204 int FI;
205 int Tag;
206 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
207 explicit SlotWithTag(const MachineInstr &MI)
208 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
209 bool operator==(const SlotWithTag &Other) const {
210 return FI == Other.FI && Tag == Other.Tag;
211 }
212};
213} // namespace
214
215namespace llvm {
216template <> struct DenseMapInfo<SlotWithTag> {
217 static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
218 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
219 static unsigned getHashValue(const SlotWithTag &V) {
222 }
223 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
224 return A == B;
225 }
226};
227} // namespace llvm
228
229static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
230 return MFI->getUseLocalStackAllocationBlock() &&
231 MFI->isObjectPreAllocated(FI);
232}
233
234// Pin one of the tagged slots to offset 0 from the tagged base pointer.
235// This would make its address available in a virtual register (IRG's def), as
236// opposed to requiring an ADDG instruction to materialize. This effectively
237// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
238// live almost everywhere anyway), and therefore needs to happen before
239// regalloc.
240std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
241 // Find the best (FI, Tag) pair to pin to offset 0.
242 // Looking at the possible uses of a tagged address, the advantage of pinning
243 // is:
244 // - COPY to physical register.
245 // Does not matter, this would trade a MOV instruction for an ADDG.
246 // - ST*G matter, but those mostly appear near the function prologue where all
247 // the tagged addresses need to be materialized anyway; also, counting ST*G
248 // uses would overweight large allocas that require more than one ST*G
249 // instruction.
250 // - Load/Store instructions in the address operand do not require a tagged
251 // pointer, so they also do not benefit. These operands have already been
252 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
253 // instructions count.
254 // - Any other instruction may benefit from being pinned to offset 0.
255 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
256 if (!ClFirstSlot)
257 return std::nullopt;
258
260 SlotWithTag MaxScoreST{-1, -1};
261 int MaxScore = -1;
262 for (auto *I : ReTags) {
263 SlotWithTag ST{*I};
264 if (isSlotPreAllocated(MFI, ST.FI))
265 continue;
266
267 Register RetagReg = I->getOperand(0).getReg();
268 if (!RetagReg.isVirtual())
269 continue;
270
271 int Score = 0;
273 WorkList.push_back(RetagReg);
274
275 while (!WorkList.empty()) {
276 Register UseReg = WorkList.pop_back_val();
277 for (auto &UseI : MRI->use_instructions(UseReg)) {
278 unsigned Opcode = UseI.getOpcode();
279 if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
280 Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
281 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
282 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
283 Opcode == AArch64::STZGloop_wback)
284 continue;
285 if (UseI.isCopy()) {
286 Register DstReg = UseI.getOperand(0).getReg();
287 if (DstReg.isVirtual())
288 WorkList.push_back(DstReg);
289 continue;
290 }
291 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
292 << Register::virtReg2Index(UseReg) << " in " << UseI
293 << "\n");
294 Score++;
295 }
296 }
297
298 int TotalScore = RetagScore[ST] += Score;
299 if (TotalScore > MaxScore ||
300 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
301 MaxScore = TotalScore;
302 MaxScoreST = ST;
303 }
304 }
305
306 if (MaxScoreST.FI < 0)
307 return std::nullopt;
308
309 // If FI's tag is already 0, we are done.
310 if (MaxScoreST.Tag == 0)
311 return MaxScoreST.FI;
312
313 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
314 SlotWithTag SwapST{-1, -1};
315 for (auto *I : ReTags) {
316 SlotWithTag ST{*I};
317 if (ST.Tag == 0) {
318 SwapST = ST;
319 break;
320 }
321 }
322
323 // Swap tags between the victim and the highest scoring pair.
324 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
325 // the highest score slot without changing anything else.
326 for (auto *&I : ReTags) {
327 SlotWithTag ST{*I};
328 MachineOperand &TagOp = I->getOperand(4);
329 if (ST == MaxScoreST) {
330 TagOp.setImm(0);
331 } else if (ST == SwapST) {
332 TagOp.setImm(MaxScoreST.Tag);
333 }
334 }
335 return MaxScoreST.FI;
336}
337
338bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
339 MF = &Func;
340 MRI = &MF->getRegInfo();
341 AFI = MF->getInfo<AArch64FunctionInfo>();
342 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
343 TRI = static_cast<const AArch64RegisterInfo *>(
345 MFI = &MF->getFrameInfo();
346 ReTags.clear();
347
348 assert(MRI->isSSA());
349
350 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
351 << "********** Function: " << MF->getName() << '\n');
352
353 SmallSetVector<int, 8> TaggedSlots;
354 for (auto &BB : *MF) {
355 for (auto &I : BB) {
356 if (I.getOpcode() == AArch64::TAGPstack) {
357 ReTags.push_back(&I);
358 int FI = I.getOperand(1).getIndex();
359 TaggedSlots.insert(FI);
360 // There should be no offsets in TAGP yet.
361 assert(I.getOperand(2).getImm() == 0);
362 }
363 }
364 }
365
366 // Take over from SSP. It does nothing for tagged slots, and should not really
367 // have been enabled in the first place.
368 for (int FI : TaggedSlots)
370
371 if (ReTags.empty())
372 return false;
373
374 if (mayUseUncheckedLoadStore())
375 uncheckLoadsAndStores();
376
377 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
378 // If the base tagged pointer is set up to the address of this slot,
379 // the ADDG instruction can be eliminated.
380 std::optional<int> BaseSlot = findFirstSlotCandidate();
381 if (BaseSlot)
382 AFI->setTaggedBasePointerIndex(*BaseSlot);
383
384 for (auto *I : ReTags) {
385 int FI = I->getOperand(1).getIndex();
386 int Tag = I->getOperand(4).getImm();
387 Register Base = I->getOperand(3).getReg();
388 if (Tag == 0 && FI == BaseSlot) {
389 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
390 I->getOperand(0).getReg())
391 .addReg(Base);
392 I->eraseFromParent();
393 }
394 }
395
396 return true;
397}
unsigned const MachineRegisterInfo * MRI
aarch64 globals tagging
aarch64 stack tagging pre ra
static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI)
cl::opt< UncheckedLdStMode > ClUncheckedLdSt("stack-tagging-unchecked-ld-st", cl::Hidden, cl::init(UncheckedSafe), cl::desc("Unconditionally apply unchecked-ld-st optimization (even for large " "stack frames, or in the presence of variable sized allocas)."), cl::values(clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), clEnumValN(UncheckedSafe, "safe", "apply unchecked-ld-st when the target is definitely within range"), clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")))
static cl::opt< bool > ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), cl::desc("Apply first slot optimization for stack tagging " "(eliminate ADDG Rt, Rn, 0, 0)."))
static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode)
AArch64 Stack Tagging
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1272
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool isObjectPreAllocated(int ObjectIdx) const
Return true if the object was pre-allocated into the local block.
@ SSPLK_None
Did not trigger a stack protector.
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
bool getUseLocalStackAllocationBlock() const
Get whether the local allocation blob should be allocated together or let PEI allocate the locals in ...
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64StackTaggingPreRAPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializeAArch64StackTaggingPreRAPass(PassRegistry &)
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:613
static bool isEqual(const SlotWithTag &A, const SlotWithTag &B)
static unsigned getHashValue(const SlotWithTag &V)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:50