LLVM 20.0.0git
AArch64GlobalsTagging.cpp
Go to the documentation of this file.
1//===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//===----------------------------------------------------------------------===//
10
11#include "AArch64.h"
13#include "llvm/IR/Attributes.h"
14#include "llvm/IR/Constants.h"
15#include "llvm/IR/GlobalValue.h"
17#include "llvm/IR/IRBuilder.h"
18#include "llvm/IR/Module.h"
19#include "llvm/Pass.h"
21
22#include <algorithm>
23#include <set>
24
25using namespace llvm;
26
27static const Align kTagGranuleSize = Align(16);
28
30 if (!G.isTagged())
31 return false;
32
33 assert(G.hasSanitizerMetadata() &&
34 "Missing sanitizer metadata, but symbol is apparently tagged.");
35 GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata();
36
37 // For now, don't instrument constant data, as it'll be in .rodata anyway. It
38 // may be worth instrumenting these in future to stop them from being used as
39 // gadgets.
40 if (G.getName().starts_with("llvm.") || G.isThreadLocal() || G.isConstant()) {
41 Meta.Memtag = false;
42 G.setSanitizerMetadata(Meta);
43 return false;
44 }
45
46 // Globals can be placed implicitly or explicitly in sections. There's two
47 // different types of globals that meet this criteria that cause problems:
48 // 1. Function pointers that are going into various init arrays (either
49 // explicitly through `__attribute__((section(<foo>)))` or implicitly
50 // through `__attribute__((constructor)))`, such as ".(pre)init(_array)",
51 // ".fini(_array)", ".ctors", and ".dtors". These function pointers end up
52 // overaligned and overpadded, making iterating over them problematic, and
53 // each function pointer is individually tagged (so the iteration over
54 // them causes SIGSEGV/MTE[AS]ERR).
55 // 2. Global variables put into an explicit section, where the section's name
56 // is a valid C-style identifier. The linker emits a `__start_<name>` and
57 // `__stop_<na,e>` symbol for the section, so that you can iterate over
58 // globals within this section. Unfortunately, again, these globals would
59 // be tagged and so iteration causes SIGSEGV/MTE[AS]ERR.
60 //
61 // To mitigate both these cases, and because specifying a section is rare
62 // outside of these two cases, disable MTE protection for globals in any
63 // section.
64 if (G.hasSection()) {
65 Meta.Memtag = false;
66 G.setSanitizerMetadata(Meta);
67 return false;
68 }
69
70 return true;
71}
72
73// Technically, due to ELF symbol interposition semantics, we can't change the
74// alignment or size of symbols. If we increase the alignment or size of a
75// symbol, the compiler may make optimisations based on this new alignment or
76// size. If the symbol is interposed, this optimisation could lead to
77// alignment-related or OOB read/write crashes.
78//
79// This is handled in the linker. When the linker sees multiple declarations of
80// a global variable, and some are tagged, and some are untagged, it resolves it
81// to be an untagged definition - but preserves the tag-granule-rounded size and
82// tag-granule-alignment. This should prevent these kind of crashes intra-DSO.
83// For cross-DSO, it's been a reasonable contract that if you're interposing a
84// sanitizer-instrumented global, then the interposer also needs to be
85// sanitizer-instrumented.
86//
87// FIXME: In theory, this can be fixed by splitting the size/alignment of
88// globals into two uses: an "output alignment" that's emitted to the ELF file,
89// and an "optimisation alignment" that's used for optimisation. Thus, we could
90// adjust the output alignment only, and still optimise based on the pessimistic
91// pre-tagging size/alignment.
93 Constant *Initializer = G->getInitializer();
94 uint64_t SizeInBytes =
95 M.getDataLayout().getTypeAllocSize(Initializer->getType());
96
97 uint64_t NewSize = alignTo(SizeInBytes, kTagGranuleSize);
98 if (SizeInBytes != NewSize) {
99 // Pad the initializer out to the next multiple of 16 bytes.
100 llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0);
101 Constant *Padding = ConstantDataArray::get(M.getContext(), Init);
102 Initializer = ConstantStruct::getAnon({Initializer, Padding});
103 auto *NewGV = new GlobalVariable(
104 M, Initializer->getType(), G->isConstant(), G->getLinkage(),
105 Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace());
106 NewGV->copyAttributesFrom(G);
107 NewGV->setComdat(G->getComdat());
108 NewGV->copyMetadata(G, 0);
109
110 NewGV->takeName(G);
111 G->replaceAllUsesWith(NewGV);
112 G->eraseFromParent();
113 G = NewGV;
114 }
115
116 G->setAlignment(std::max(G->getAlign().valueOrOne(), kTagGranuleSize));
117
118 // Ensure that tagged globals don't get merged by ICF - as they should have
119 // different tags at runtime.
120 G->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
121}
122
123namespace {
124class AArch64GlobalsTagging : public ModulePass {
125public:
126 static char ID;
127
128 explicit AArch64GlobalsTagging() : ModulePass(ID) {
130 }
131
132 bool runOnModule(Module &M) override;
133
134 StringRef getPassName() const override { return "AArch64 Globals Tagging"; }
135
136private:
137 std::set<GlobalVariable *> GlobalsToTag;
138};
139} // anonymous namespace
140
141char AArch64GlobalsTagging::ID = 0;
142
143bool AArch64GlobalsTagging::runOnModule(Module &M) {
144 // No mutating the globals in-place, or iterator invalidation occurs.
145 std::vector<GlobalVariable *> GlobalsToTag;
146 for (GlobalVariable &G : M.globals()) {
147 if (G.isDeclaration() || !shouldTagGlobal(G))
148 continue;
149 GlobalsToTag.push_back(&G);
150 }
151
152 for (GlobalVariable *G : GlobalsToTag) {
154 }
155
156 return true;
157}
158
159INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging",
160 "AArch64 Globals Tagging Pass", false, false)
161INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging",
162 "AArch64 Globals Tagging Pass", false, false)
163
165 return new AArch64GlobalsTagging();
166}
static bool shouldTagGlobal(GlobalVariable &G)
static void tagGlobalDefinition(Module &M, GlobalVariable *G)
static const Align kTagGranuleSize
aarch64 globals tagging
AArch64 Stack Tagging
This file contains the simple types necessary to represent the attributes associated with functions a...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
dxil globals
#define G(x, y, z)
Definition: MD5.cpp:56
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:706
static Constant * getAnon(ArrayRef< Constant * > V, bool Packed=false)
Return an anonymous struct that has the specified elements.
Definition: Constants.h:477
This is an important base class in LLVM.
Definition: Constant.h:42
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeAArch64GlobalsTaggingPass(PassRegistry &)
ModulePass * createAArch64GlobalsTaggingPass()
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39