|
static const unsigned | kOriginSize = 4 |
|
static const Align | kMinOriginAlignment = Align(4) |
|
static const Align | kShadowTLSAlignment = Align(8) |
|
static const unsigned | kParamTLSSize = 800 |
|
static const unsigned | kRetvalTLSSize = 800 |
|
static const size_t | kNumberOfAccessSizes = 4 |
|
static cl::opt< int > | ClTrackOrigins ("msan-track-origins", cl::desc("Track origins (allocation sites) of poisoned memory"), cl::Hidden, cl::init(0)) |
| Track origins of uninitialized values.
|
|
static cl::opt< bool > | ClKeepGoing ("msan-keep-going", cl::desc("keep going after reporting a UMR"), cl::Hidden, cl::init(false)) |
|
static cl::opt< bool > | ClPoisonStack ("msan-poison-stack", cl::desc("poison uninitialized stack variables"), cl::Hidden, cl::init(true)) |
|
static cl::opt< bool > | ClPoisonStackWithCall ("msan-poison-stack-with-call", cl::desc("poison uninitialized stack variables with a call"), cl::Hidden, cl::init(false)) |
|
static cl::opt< int > | ClPoisonStackPattern ("msan-poison-stack-pattern", cl::desc("poison uninitialized stack variables with the given pattern"), cl::Hidden, cl::init(0xff)) |
|
static cl::opt< bool > | ClPrintStackNames ("msan-print-stack-names", cl::desc("Print name of local stack variable"), cl::Hidden, cl::init(true)) |
|
static cl::opt< bool > | ClPoisonUndef ("msan-poison-undef", cl::desc("poison undef temps"), cl::Hidden, cl::init(true)) |
|
static cl::opt< bool > | ClHandleICmp ("msan-handle-icmp", cl::desc("propagate shadow through ICmpEQ and ICmpNE"), cl::Hidden, cl::init(true)) |
|
static cl::opt< bool > | ClHandleICmpExact ("msan-handle-icmp-exact", cl::desc("exact handling of relational integer ICmp"), cl::Hidden, cl::init(true)) |
|
static cl::opt< bool > | ClHandleLifetimeIntrinsics ("msan-handle-lifetime-intrinsics", cl::desc("when possible, poison scoped variables at the beginning of the scope " "(slower, but more precise)"), cl::Hidden, cl::init(true)) |
|
static cl::opt< bool > | ClHandleAsmConservative ("msan-handle-asm-conservative", cl::desc("conservative handling of inline assembly"), cl::Hidden, cl::init(true)) |
|
static cl::opt< bool > | ClCheckAccessAddress ("msan-check-access-address", cl::desc("report accesses through a pointer which has poisoned shadow"), cl::Hidden, cl::init(true)) |
|
static cl::opt< bool > | ClEagerChecks ("msan-eager-checks", cl::desc("check arguments and return values at function call boundaries"), cl::Hidden, cl::init(false)) |
|
static cl::opt< bool > | ClDumpStrictInstructions ("msan-dump-strict-instructions", cl::desc("print out instructions with default strict semantics"), cl::Hidden, cl::init(false)) |
|
static cl::opt< int > | ClInstrumentationWithCallThreshold ("msan-instrumentation-with-call-threshold", cl::desc("If the function being instrumented requires more than " "this number of checks and origin stores, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(3500)) |
|
static cl::opt< bool > | ClEnableKmsan ("msan-kernel", cl::desc("Enable KernelMemorySanitizer instrumentation"), cl::Hidden, cl::init(false)) |
|
static cl::opt< bool > | ClDisableChecks ("msan-disable-checks", cl::desc("Apply no_sanitize to the whole file"), cl::Hidden, cl::init(false)) |
|
static cl::opt< bool > | ClCheckConstantShadow ("msan-check-constant-shadow", cl::desc("Insert checks for constant shadow values"), cl::Hidden, cl::init(true)) |
|
static cl::opt< bool > | ClWithComdat ("msan-with-comdat", cl::desc("Place MSan constructors in comdat sections"), cl::Hidden, cl::init(false)) |
|
static cl::opt< uint64_t > | ClAndMask ("msan-and-mask", cl::desc("Define custom MSan AndMask"), cl::Hidden, cl::init(0)) |
|
static cl::opt< uint64_t > | ClXorMask ("msan-xor-mask", cl::desc("Define custom MSan XorMask"), cl::Hidden, cl::init(0)) |
|
static cl::opt< uint64_t > | ClShadowBase ("msan-shadow-base", cl::desc("Define custom MSan ShadowBase"), cl::Hidden, cl::init(0)) |
|
static cl::opt< uint64_t > | ClOriginBase ("msan-origin-base", cl::desc("Define custom MSan OriginBase"), cl::Hidden, cl::init(0)) |
|
static cl::opt< int > | ClDisambiguateWarning ("msan-disambiguate-warning-threshold", cl::desc("Define threshold for number of checks per " "debug location to force origin update."), cl::Hidden, cl::init(3)) |
|
const char | kMsanModuleCtorName [] = "msan.module_ctor" |
|
const char | kMsanInitName [] = "__msan_init" |
|
static const MemoryMapParams | Linux_I386_MemoryMapParams |
|
static const MemoryMapParams | Linux_X86_64_MemoryMapParams |
|
static const MemoryMapParams | Linux_MIPS64_MemoryMapParams |
|
static const MemoryMapParams | Linux_PowerPC64_MemoryMapParams |
|
static const MemoryMapParams | Linux_S390X_MemoryMapParams |
|
static const MemoryMapParams | Linux_AArch64_MemoryMapParams |
|
static const MemoryMapParams | Linux_LoongArch64_MemoryMapParams |
|
static const MemoryMapParams | FreeBSD_AArch64_MemoryMapParams |
|
static const MemoryMapParams | FreeBSD_I386_MemoryMapParams |
|
static const MemoryMapParams | FreeBSD_X86_64_MemoryMapParams |
|
static const MemoryMapParams | NetBSD_X86_64_MemoryMapParams |
|
static const PlatformMemoryMapParams | Linux_X86_MemoryMapParams |
|
static const PlatformMemoryMapParams | Linux_MIPS_MemoryMapParams |
|
static const PlatformMemoryMapParams | Linux_PowerPC_MemoryMapParams |
|
static const PlatformMemoryMapParams | Linux_S390_MemoryMapParams |
|
static const PlatformMemoryMapParams | Linux_ARM_MemoryMapParams |
|
static const PlatformMemoryMapParams | Linux_LoongArch_MemoryMapParams |
|
static const PlatformMemoryMapParams | FreeBSD_ARM_MemoryMapParams |
|
static const PlatformMemoryMapParams | FreeBSD_X86_MemoryMapParams |
|
static const PlatformMemoryMapParams | NetBSD_X86_MemoryMapParams |
|
This file is a part of MemorySanitizer, a detector of uninitialized reads.
The algorithm of the tool is similar to Memcheck (https://static.usenix.org/event/usenix05/tech/general/full_papers/seward/seward_html/usenix2005.html) We associate a few shadow bits with every byte of the application memory, poison the shadow of the malloc-ed or alloca-ed memory, load the shadow, bits on every memory read, propagate the shadow bits through some of the arithmetic instruction (including MOV), store the shadow bits on every memory write, report a bug on some other instructions (e.g. JMP) if the associated shadow is poisoned.
But there are differences too. The first and the major one: compiler instrumentation instead of binary instrumentation. This gives us much better register allocation, possible compiler optimizations and a fast start-up. But this brings the major issue as well: msan needs to see all program events, including system calls and reads/writes in system libraries, so we either need to compile everything with msan or use a binary translation component (e.g. DynamoRIO) to instrument pre-built libraries. Another difference from Memcheck is that we use 8 shadow bits per byte of application memory and use a direct shadow mapping. This greatly simplifies the instrumentation code and avoids races on shadow updates (Memcheck is single-threaded so races are not a concern there. Memcheck uses 2 shadow bits per byte with a slow path storage that uses 8 bits per byte).
The default value of shadow is 0, which means "clean" (not poisoned).
Every module initializer should call __msan_init to ensure that the shadow memory is ready. On error, __msan_warning is called. Since parameters and return values may be passed via registers, we have a specialized thread-local shadow for return values (__msan_retval_tls) and parameters (__msan_param_tls).
Origin tracking.
MemorySanitizer can track origins (allocation points) of all uninitialized values. This behavior is controlled with a flag (msan-track-origins) and is disabled by default.
Origins are 4-byte values created and interpreted by the runtime library. They are stored in a second shadow mapping, one 4-byte value for 4 bytes of application memory. Propagation of origins is basically a bunch of "select" instructions that pick the origin of a dirty argument, if an instruction has one.
Every 4 aligned, consecutive bytes of application memory have one origin value associated with them. If these bytes contain uninitialized data coming from 2 different allocations, the last store wins. Because of this, MemorySanitizer reports can show unrelated origins, but this is unlikely in practice.
Origins are meaningless for fully initialized values, so MemorySanitizer avoids storing origin to memory when a fully initialized value is stored. This way it avoids needless overwriting origin of the 4-byte region on a short (i.e. 1 byte) clean store, and it is also good for performance.
Atomic handling.
Ideally, every atomic store of application value should update the corresponding shadow location in an atomic way. Unfortunately, atomic store of two disjoint locations can not be done without severe slowdown.
Therefore, we implement an approximation that may err on the safe side. In this implementation, every atomically accessed location in the program may only change from (partially) uninitialized to fully initialized, but not the other way around. We load the shadow after the application load, and we store the shadow before the app store. Also, we always store clean shadow (if the application store is atomic). This way, if the store-load pair constitutes a happens-before arc, shadow store and load are correctly ordered such that the load will get either the value that was stored, or some later value (which is always clean).
This does not work very well with Compare-And-Swap (CAS) and Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW must store the new shadow before the app operation, and load the shadow after the app operation. Computers don't work this way. Current implementation ignores the load aspect of CAS/RMW, always returning a clean value. It implements the store part as a simple atomic store by storing a clean shadow.
Instrumenting inline assembly.
For inline assembly code LLVM has little idea about which memory locations become initialized depending on the arguments. It can be possible to figure out which arguments are meant to point to inputs and outputs, but the actual semantics can be only visible at runtime. In the Linux kernel it's also possible that the arguments only indicate the offset for a base taken from a segment register, so it's dangerous to treat any asm() arguments as pointers. We take a conservative approach generating calls to __msan_instrument_asm_store(ptr, size) , which defer the memory unpoisoning to the runtime library. The latter can perform more complex address checks to figure out whether it's safe to touch the shadow memory. Like with atomic operations, we call __msan_instrument_asm_store() before the assembly call, so that changes to the shadow memory will be seen by other threads together with main memory initialization.
KernelMemorySanitizer (KMSAN) implementation.
The major differences between KMSAN and MSan instrumentation are:
- KMSAN always tracks the origins and implies msan-keep-going=true;
- KMSAN allocates shadow and origin memory for each page separately, so there are no explicit accesses to shadow and origin in the instrumentation. Shadow and origin values for a particular X-byte memory location (X=1,2,4,8) are accessed through pointers obtained via the __msan_metadata_ptr_for_load_X(ptr) __msan_metadata_ptr_for_store_X(ptr) functions. The corresponding functions check that the X-byte accesses are possible and returns the pointers to shadow and origin memory. Arbitrary sized accesses are handled with: __msan_metadata_ptr_for_load_n(ptr, size) __msan_metadata_ptr_for_store_n(ptr, size); Note that the sanitizer code has to deal with how shadow/origin pairs returned by the these functions are represented in different ABIs. In the X86_64 ABI they are returned in RDX:RAX, in PowerPC64 they are returned in r3 and r4, and in the SystemZ ABI they are written to memory pointed to by a hidden parameter.
- TLS variables are stored in a single per-task struct. A call to a function __msan_get_context_state() returning a pointer to that struct is inserted into every instrumented function before the entry block;
- __msan_warning() takes a 32-bit origin parameter;
- local variables are poisoned with __msan_poison_alloca() upon function entry and unpoisoned with __msan_unpoison_alloca() before leaving the function;
- the pass doesn't declare any global variables or add global constructors to the translation unit.
Also, KMSAN currently ignores uninitialized memory passed into inline asm calls, making sure we're on the safe side wrt. possible false positives.
KernelMemorySanitizer only supports X86_64, SystemZ and PowerPC64 at the moment.
Definition in file MemorySanitizer.cpp.