LLVM 22.0.0git
rpmalloc.c
Go to the documentation of this file.
1//===---------------------- rpmalloc.c ------------------*- C -*-=============//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This library provides a cross-platform lock free thread caching malloc
10// implementation in C11.
11//
12//===----------------------------------------------------------------------===//
13
14#include "rpmalloc.h"
15
16////////////
17///
18/// Build time configurable limits
19///
20//////
21
22#if defined(__clang__)
23#pragma clang diagnostic ignored "-Wunused-macros"
24#pragma clang diagnostic ignored "-Wunused-function"
25#if __has_warning("-Wreserved-identifier")
26#pragma clang diagnostic ignored "-Wreserved-identifier"
27#endif
28#if __has_warning("-Wstatic-in-inline")
29#pragma clang diagnostic ignored "-Wstatic-in-inline"
30#endif
31#elif defined(__GNUC__)
32#pragma GCC diagnostic ignored "-Wunused-macros"
33#pragma GCC diagnostic ignored "-Wunused-function"
34#endif
35
36#if !defined(__has_builtin)
37#define __has_builtin(b) 0
38#endif
39
40#if defined(__GNUC__) || defined(__clang__)
41
42#if __has_builtin(__builtin_memcpy_inline)
43#define _rpmalloc_memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
44#else
45#define _rpmalloc_memcpy_const(x, y, s) \
46 do { \
47 _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), \
48 "len must be a constant integer"); \
49 memcpy(x, y, s); \
50 } while (0)
51#endif
52
53#if __has_builtin(__builtin_memset_inline)
54#define _rpmalloc_memset_const(x, y, s) __builtin_memset_inline(x, y, s)
55#else
56#define _rpmalloc_memset_const(x, y, s) \
57 do { \
58 _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0), \
59 "len must be a constant integer"); \
60 memset(x, y, s); \
61 } while (0)
62#endif
63#else
64#define _rpmalloc_memcpy_const(x, y, s) memcpy(x, y, s)
65#define _rpmalloc_memset_const(x, y, s) memset(x, y, s)
66#endif
67
68#if __has_builtin(__builtin_assume)
69#define rpmalloc_assume(cond) __builtin_assume(cond)
70#elif defined(__GNUC__)
71#define rpmalloc_assume(cond) \
72 do { \
73 if (!__builtin_expect(cond, 0)) \
74 __builtin_unreachable(); \
75 } while (0)
76#elif defined(_MSC_VER)
77#define rpmalloc_assume(cond) __assume(cond)
78#else
79#define rpmalloc_assume(cond) 0
80#endif
81
82#ifndef HEAP_ARRAY_SIZE
83//! Size of heap hashmap
84#define HEAP_ARRAY_SIZE 47
85#endif
86#ifndef ENABLE_THREAD_CACHE
87//! Enable per-thread cache
88#define ENABLE_THREAD_CACHE 1
89#endif
90#ifndef ENABLE_GLOBAL_CACHE
91//! Enable global cache shared between all threads, requires thread cache
92#define ENABLE_GLOBAL_CACHE 1
93#endif
94#ifndef ENABLE_VALIDATE_ARGS
95//! Enable validation of args to public entry points
96#define ENABLE_VALIDATE_ARGS 0
97#endif
98#ifndef ENABLE_STATISTICS
99//! Enable statistics collection
100#define ENABLE_STATISTICS 0
101#endif
102#ifndef ENABLE_ASSERTS
103//! Enable asserts
104#define ENABLE_ASSERTS 0
105#endif
106#ifndef ENABLE_OVERRIDE
107//! Override standard library malloc/free and new/delete entry points
108#define ENABLE_OVERRIDE 0
109#endif
110#ifndef ENABLE_PRELOAD
111//! Support preloading
112#define ENABLE_PRELOAD 0
113#endif
114#ifndef DISABLE_UNMAP
115//! Disable unmapping memory pages (also enables unlimited cache)
116#define DISABLE_UNMAP 0
117#endif
118#ifndef ENABLE_UNLIMITED_CACHE
119//! Enable unlimited global cache (no unmapping until finalization)
120#define ENABLE_UNLIMITED_CACHE 0
121#endif
122#ifndef ENABLE_ADAPTIVE_THREAD_CACHE
123//! Enable adaptive thread cache size based on use heuristics
124#define ENABLE_ADAPTIVE_THREAD_CACHE 0
125#endif
126#ifndef DEFAULT_SPAN_MAP_COUNT
127//! Default number of spans to map in call to map more virtual memory (default
128//! values yield 4MiB here)
129#define DEFAULT_SPAN_MAP_COUNT 64
130#endif
131#ifndef GLOBAL_CACHE_MULTIPLIER
132//! Multiplier for global cache
133#define GLOBAL_CACHE_MULTIPLIER 8
134#endif
135
136#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
137#error Must use global cache if unmap is disabled
138#endif
139
140#if DISABLE_UNMAP
141#undef ENABLE_UNLIMITED_CACHE
142#define ENABLE_UNLIMITED_CACHE 1
143#endif
144
145#if !ENABLE_GLOBAL_CACHE
146#undef ENABLE_UNLIMITED_CACHE
147#define ENABLE_UNLIMITED_CACHE 0
148#endif
149
150#if !ENABLE_THREAD_CACHE
151#undef ENABLE_ADAPTIVE_THREAD_CACHE
152#define ENABLE_ADAPTIVE_THREAD_CACHE 0
153#endif
154
155#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
156#define PLATFORM_WINDOWS 1
157#define PLATFORM_POSIX 0
158#else
159#define PLATFORM_WINDOWS 0
160#define PLATFORM_POSIX 1
161#endif
162
163/// Platform and arch specifics
164#if defined(_MSC_VER) && !defined(__clang__)
165#pragma warning(disable : 5105)
166#ifndef FORCEINLINE
167#define FORCEINLINE inline __forceinline
168#endif
169#define _Static_assert static_assert
170#else
171#ifndef FORCEINLINE
172#define FORCEINLINE inline __attribute__((__always_inline__))
173#endif
174#endif
175#if PLATFORM_WINDOWS
176#ifndef WIN32_LEAN_AND_MEAN
177#define WIN32_LEAN_AND_MEAN
178#endif
179#include <windows.h>
180#if ENABLE_VALIDATE_ARGS
181#include <intsafe.h>
182#endif
183#else
184#include <stdio.h>
185#include <stdlib.h>
186#include <time.h>
187#include <unistd.h>
188#if defined(__linux__) || defined(__ANDROID__)
189#include <sys/prctl.h>
190#if !defined(PR_SET_VMA)
191#define PR_SET_VMA 0x53564d41
192#define PR_SET_VMA_ANON_NAME 0
193#endif
194#endif
195#if defined(__APPLE__)
196#include <TargetConditionals.h>
197#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
198#include <mach/mach_vm.h>
199#include <mach/vm_statistics.h>
200#endif
201#include <pthread.h>
202#endif
203#if defined(__HAIKU__) || defined(__TINYC__)
204#include <pthread.h>
205#endif
206#endif
207
208#include <errno.h>
209#include <stdint.h>
210#include <string.h>
211
212#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
213#include <fibersapi.h>
214static DWORD fls_key;
215#endif
216
217#if PLATFORM_POSIX
218#include <sched.h>
219#include <sys/mman.h>
220#ifdef __FreeBSD__
221#include <sys/sysctl.h>
222#define MAP_HUGETLB MAP_ALIGNED_SUPER
223#ifndef PROT_MAX
224#define PROT_MAX(f) 0
225#endif
226#else
227#define PROT_MAX(f) 0
228#endif
229#ifdef __sun
230extern int madvise(caddr_t, size_t, int);
231#endif
232#ifndef MAP_UNINITIALIZED
233#define MAP_UNINITIALIZED 0
234#endif
235#endif
236#include <errno.h>
237
238#if ENABLE_ASSERTS
239#undef NDEBUG
240#if defined(_MSC_VER) && !defined(_DEBUG)
241#define _DEBUG
242#endif
243#include <assert.h>
244#define RPMALLOC_TOSTRING_M(x) #x
245#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
246#define rpmalloc_assert(truth, message) \
247 do { \
248 if (!(truth)) { \
249 if (_memory_config.error_callback) { \
250 _memory_config.error_callback(message " (" RPMALLOC_TOSTRING( \
251 truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__)); \
252 } else { \
253 assert((truth) && message); \
254 } \
255 } \
256 } while (0)
257#else
258#define rpmalloc_assert(truth, message) \
259 do { \
260 } while (0)
261#endif
262#if ENABLE_STATISTICS
263#include <stdio.h>
264#endif
265
266//////
267///
268/// Atomic access abstraction (since MSVC does not do C11 yet)
269///
270//////
271
272#if defined(_MSC_VER) && !defined(__clang__)
273
274typedef volatile long atomic32_t;
275typedef volatile long long atomic64_t;
276typedef volatile void *atomicptr_t;
277
278static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
279 return (int32_t)InterlockedOr(src, 0);
280}
281static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
282 InterlockedExchange(dst, val);
283}
284static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
285 return (int32_t)InterlockedIncrement(val);
286}
287static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
288 return (int32_t)InterlockedDecrement(val);
289}
290static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
291 return (int32_t)InterlockedExchangeAdd(val, add) + add;
292}
293static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
294 int32_t ref) {
295 return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0;
296}
297static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
298 InterlockedExchange(dst, val);
299}
300static FORCEINLINE int64_t atomic_load64(atomic64_t *src) {
301 return (int64_t)InterlockedOr64(src, 0);
302}
303static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
304 return (int64_t)InterlockedExchangeAdd64(val, add) + add;
305}
306static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
307 return InterlockedCompareExchangePointer(src, 0, 0);
308}
309static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
310 InterlockedExchangePointer(dst, val);
311}
312static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
313 InterlockedExchangePointer(dst, val);
314}
315static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
316 void *val) {
317 return (void *)InterlockedExchangePointer((void *volatile *)dst, val);
318}
319static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
320 return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) ==
321 ref)
322 ? 1
323 : 0;
324}
325
326#define EXPECTED(x) (x)
327#define UNEXPECTED(x) (x)
328
329#else
330
331#include <stdatomic.h>
332
333typedef volatile _Atomic(int32_t) atomic32_t;
334typedef volatile _Atomic(int64_t) atomic64_t;
335typedef volatile _Atomic(void *) atomicptr_t;
336
337static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
338 return atomic_load_explicit(src, memory_order_relaxed);
339}
340static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
341 atomic_store_explicit(dst, val, memory_order_relaxed);
342}
343static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
344 return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1;
345}
346static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
347 return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1;
348}
349static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
350 return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
351}
352static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
353 int32_t ref) {
354 return atomic_compare_exchange_weak_explicit(
355 dst, &ref, val, memory_order_acquire, memory_order_relaxed);
356}
357static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
358 atomic_store_explicit(dst, val, memory_order_release);
359}
360static FORCEINLINE int64_t atomic_load64(atomic64_t *val) {
361 return atomic_load_explicit(val, memory_order_relaxed);
362}
363static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
364 return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
365}
366static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
367 return atomic_load_explicit(src, memory_order_relaxed);
368}
369static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
370 atomic_store_explicit(dst, val, memory_order_relaxed);
371}
372static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
373 atomic_store_explicit(dst, val, memory_order_release);
374}
375static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
376 void *val) {
377 return atomic_exchange_explicit(dst, val, memory_order_acquire);
378}
379static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
380 return atomic_compare_exchange_weak_explicit(
381 dst, &ref, val, memory_order_relaxed, memory_order_relaxed);
382}
383
384#define EXPECTED(x) __builtin_expect((x), 1)
385#define UNEXPECTED(x) __builtin_expect((x), 0)
386
387#endif
388
389////////////
390///
391/// Statistics related functions (evaluate to nothing when statistics not
392/// enabled)
393///
394//////
395
396#if ENABLE_STATISTICS
397#define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
398#define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
399#define _rpmalloc_stat_add(counter, value) \
400 atomic_add32(counter, (int32_t)(value))
401#define _rpmalloc_stat_add64(counter, value) \
402 atomic_add64(counter, (int64_t)(value))
403#define _rpmalloc_stat_add_peak(counter, value, peak) \
404 do { \
405 int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); \
406 if (_cur_count > (peak)) \
407 peak = _cur_count; \
408 } while (0)
409#define _rpmalloc_stat_sub(counter, value) \
410 atomic_add32(counter, -(int32_t)(value))
411#define _rpmalloc_stat_inc_alloc(heap, class_idx) \
412 do { \
413 int32_t alloc_current = \
414 atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \
415 if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \
416 heap->size_class_use[class_idx].alloc_peak = alloc_current; \
417 atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \
418 } while (0)
419#define _rpmalloc_stat_inc_free(heap, class_idx) \
420 do { \
421 atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \
422 atomic_incr32(&heap->size_class_use[class_idx].free_total); \
423 } while (0)
424#else
425#define _rpmalloc_stat_inc(counter) \
426 do { \
427 } while (0)
428#define _rpmalloc_stat_dec(counter) \
429 do { \
430 } while (0)
431#define _rpmalloc_stat_add(counter, value) \
432 do { \
433 } while (0)
434#define _rpmalloc_stat_add64(counter, value) \
435 do { \
436 } while (0)
437#define _rpmalloc_stat_add_peak(counter, value, peak) \
438 do { \
439 } while (0)
440#define _rpmalloc_stat_sub(counter, value) \
441 do { \
442 } while (0)
443#define _rpmalloc_stat_inc_alloc(heap, class_idx) \
444 do { \
445 } while (0)
446#define _rpmalloc_stat_inc_free(heap, class_idx) \
447 do { \
448 } while (0)
449#endif
450
451///
452/// Preconfigured limits and sizes
453///
454
455//! Granularity of a small allocation block (must be power of two)
456#define SMALL_GRANULARITY 16
457//! Small granularity shift count
458#define SMALL_GRANULARITY_SHIFT 4
459//! Number of small block size classes
460#define SMALL_CLASS_COUNT 65
461//! Maximum size of a small block
462#define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
463//! Granularity of a medium allocation block
464#define MEDIUM_GRANULARITY 512
465//! Medium granularity shift count
466#define MEDIUM_GRANULARITY_SHIFT 9
467//! Number of medium block size classes
468#define MEDIUM_CLASS_COUNT 61
469//! Total number of small + medium size classes
470#define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
471//! Number of large block size classes
472#define LARGE_CLASS_COUNT 63
473//! Maximum size of a medium block
474#define MEDIUM_SIZE_LIMIT \
475 (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
476//! Maximum size of a large block
477#define LARGE_SIZE_LIMIT \
478 ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
479//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power
480//! of two)
481#define SPAN_HEADER_SIZE 128
482//! Number of spans in thread cache
483#define MAX_THREAD_SPAN_CACHE 400
484//! Number of spans to transfer between thread and global cache
485#define THREAD_SPAN_CACHE_TRANSFER 64
486//! Number of spans in thread cache for large spans (must be greater than
487//! LARGE_CLASS_COUNT / 2)
488#define MAX_THREAD_SPAN_LARGE_CACHE 100
489//! Number of spans to transfer between thread and global cache for large spans
490#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
491
493 "Small granularity must be power of two");
495 "Span header size must be power of two");
496
497#if ENABLE_VALIDATE_ARGS
498//! Maximum allocation size to avoid integer overflow
499#undef MAX_ALLOC_SIZE
500#define MAX_ALLOC_SIZE (((size_t) - 1) - _memory_span_size)
501#endif
502
503#define pointer_offset(ptr, ofs) (void *)((char *)(ptr) + (ptrdiff_t)(ofs))
504#define pointer_diff(first, second) \
505 (ptrdiff_t)((const char *)(first) - (const char *)(second))
506
507#define INVALID_POINTER ((void *)((uintptr_t) - 1))
508
509#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
510#define SIZE_CLASS_HUGE ((uint32_t) - 1)
511
512////////////
513///
514/// Data types
515///
516//////
517
518//! A memory heap, per thread
519typedef struct heap_t heap_t;
520//! Span of memory pages
521typedef struct span_t span_t;
522//! Span list
524//! Span active data
526//! Size class definition
528//! Global cache
530
531//! Flag indicating span is the first (master) span of a split superspan
532#define SPAN_FLAG_MASTER 1U
533//! Flag indicating span is a secondary (sub) span of a split superspan
534#define SPAN_FLAG_SUBSPAN 2U
535//! Flag indicating span has blocks with increased alignment
536#define SPAN_FLAG_ALIGNED_BLOCKS 4U
537//! Flag indicating an unmapped master span
538#define SPAN_FLAG_UNMAPPED_MASTER 8U
539
540#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
541struct span_use_t {
542 //! Current number of spans used (actually used, not in cache)
543 atomic32_t current;
544 //! High water mark of spans used
545 atomic32_t high;
546#if ENABLE_STATISTICS
547 //! Number of spans in deferred list
548 atomic32_t spans_deferred;
549 //! Number of spans transitioned to global cache
550 atomic32_t spans_to_global;
551 //! Number of spans transitioned from global cache
552 atomic32_t spans_from_global;
553 //! Number of spans transitioned to thread cache
554 atomic32_t spans_to_cache;
555 //! Number of spans transitioned from thread cache
556 atomic32_t spans_from_cache;
557 //! Number of spans transitioned to reserved state
558 atomic32_t spans_to_reserved;
559 //! Number of spans transitioned from reserved state
560 atomic32_t spans_from_reserved;
561 //! Number of raw memory map calls
562 atomic32_t spans_map_calls;
563#endif
564};
565typedef struct span_use_t span_use_t;
566#endif
567
568#if ENABLE_STATISTICS
569struct size_class_use_t {
570 //! Current number of allocations
571 atomic32_t alloc_current;
572 //! Peak number of allocations
573 int32_t alloc_peak;
574 //! Total number of allocations
575 atomic32_t alloc_total;
576 //! Total number of frees
577 atomic32_t free_total;
578 //! Number of spans in use
579 atomic32_t spans_current;
580 //! Number of spans transitioned to cache
581 int32_t spans_peak;
582 //! Number of spans transitioned to cache
583 atomic32_t spans_to_cache;
584 //! Number of spans transitioned from cache
585 atomic32_t spans_from_cache;
586 //! Number of spans transitioned from reserved state
587 atomic32_t spans_from_reserved;
588 //! Number of spans mapped
589 atomic32_t spans_map_calls;
590 int32_t unused;
591};
592typedef struct size_class_use_t size_class_use_t;
593#endif
594
595// A span can either represent a single span of memory pages with size declared
596// by span_map_count configuration variable, or a set of spans in a continuous
597// region, a super span. Any reference to the term "span" usually refers to both
598// a single span or a super span. A super span can further be divided into
599// multiple spans (or this, super spans), where the first (super)span is the
600// master and subsequent (super)spans are subspans. The master span keeps track
601// of how many subspans that are still alive and mapped in virtual memory, and
602// once all subspans and master have been unmapped the entire superspan region
603// is released and unmapped (on Windows for example, the entire superspan range
604// has to be released in the same call to release the virtual memory range, but
605// individual subranges can be decommitted individually to reduce physical
606// memory use).
607struct span_t {
608 //! Free list
610 //! Total block count of size class
612 //! Size class
614 //! Index of last block initialized in free list
616 //! Number of used blocks remaining when in partial state
618 //! Deferred free list
620 //! Size of deferred free list, or list of spans when part of a cache list
622 //! Size of a block
624 //! Flags and counters
626 //! Number of spans
628 //! Total span counter for master spans
630 //! Offset from master span for subspans
632 //! Remaining span counter, for master spans
633 atomic32_t remaining_spans;
634 //! Alignment offset
636 //! Owning heap
638 //! Next span
640 //! Previous span
642};
643_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
644
646 size_t count;
648};
650
652 size_t count;
654};
656
658 //! Free list of active span
660 //! Double linked list of partially used spans with free blocks.
661 // Previous span pointer in head points to tail span of list.
663 //! Early level cache of fully free spans
665};
667
668// Control structure for a heap, either a thread heap or a first class heap if
669// enabled
670struct heap_t {
671 //! Owning thread ID
672 uintptr_t owner_thread;
673 //! Free lists for each size class
675#if ENABLE_THREAD_CACHE
676 //! Arrays of fully freed spans, single span
677 span_cache_t span_cache;
678#endif
679 //! List of deferred free spans (single linked list)
681 //! Number of full spans
683 //! Mapped but unused spans
685 //! Master span for mapped but unused spans
687 //! Number of mapped but unused spans
689 //! Child count
690 atomic32_t child_count;
691 //! Next heap in id list
693 //! Next heap in orphan list
695 //! Heap ID
696 int32_t id;
697 //! Finalization state flag
699 //! Master heap owning the memory pages
701#if ENABLE_THREAD_CACHE
702 //! Arrays of fully freed spans, large spans with > 1 span count
703 span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
704#endif
705#if RPMALLOC_FIRST_CLASS_HEAPS
706 //! Double linked list of fully utilized spans with free blocks for each size
707 //! class.
708 // Previous span pointer in head points to tail span of list.
709 span_t *full_span[SIZE_CLASS_COUNT];
710 //! Double linked list of large and huge spans allocated by this heap
711 span_t *large_huge_span;
712#endif
713#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
714 //! Current and high water mark of spans used per span count
715 span_use_t span_use[LARGE_CLASS_COUNT];
716#endif
717#if ENABLE_STATISTICS
718 //! Allocation stats per size class
719 size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
720 //! Number of bytes transitioned thread -> global
721 atomic64_t thread_to_global;
722 //! Number of bytes transitioned global -> thread
723 atomic64_t global_to_thread;
724#endif
725};
726
727// Size class for defining a block size bucket
729 //! Size of blocks in this class
731 //! Number of blocks in each chunk
733 //! Class index this class is merged with
735};
736_Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
737
739 //! Cache lock
740 atomic32_t lock;
741 //! Cache count
743#if ENABLE_STATISTICS
744 //! Insert count
745 size_t insert_count;
746 //! Extract count
747 size_t extract_count;
748#endif
749 //! Cached spans
751 //! Unlimited cache overflow
753};
754
755////////////
756///
757/// Global data
758///
759//////
760
761//! Default span size (64KiB)
762#define _memory_default_span_size (64 * 1024)
763#define _memory_default_span_size_shift 16
764#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
765
766//! Initialized flag
768//! Main thread ID
770//! Configuration
772//! Memory page size
773static size_t _memory_page_size;
774//! Shift to divide by page size
776//! Granularity at which memory pages are mapped by OS
778#if RPMALLOC_CONFIGURABLE
779//! Size of a span of memory pages
780static size_t _memory_span_size;
781//! Shift to divide by span size
782static size_t _memory_span_size_shift;
783//! Mask to get to start of a memory span
784static uintptr_t _memory_span_mask;
785#else
786//! Hardwired span size
787#define _memory_span_size _memory_default_span_size
788#define _memory_span_size_shift _memory_default_span_size_shift
789#define _memory_span_mask _memory_default_span_mask
790#endif
791//! Number of spans to map in each map call
793//! Number of spans to keep reserved in each heap
795//! Global size classes
797//! Run-time size limit of medium blocks
799//! Heap ID counter
800static atomic32_t _memory_heap_id;
801//! Huge page support
803#if ENABLE_GLOBAL_CACHE
804//! Global span cache
805static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
806#endif
807//! Global reserved spans
809//! Global reserved count
811//! Global reserved master
813//! All heaps
815//! Used to restrict access to mapping memory for huge pages
816static atomic32_t _memory_global_lock;
817//! Orphaned heaps
819#if RPMALLOC_FIRST_CLASS_HEAPS
820//! Orphaned heaps (first class heaps)
821static heap_t *_memory_first_class_orphan_heaps;
822#endif
823#if ENABLE_STATISTICS
824//! Allocations counter
825static atomic64_t _allocation_counter;
826//! Deallocations counter
827static atomic64_t _deallocation_counter;
828//! Active heap count
829static atomic32_t _memory_active_heaps;
830//! Number of currently mapped memory pages
831static atomic32_t _mapped_pages;
832//! Peak number of concurrently mapped memory pages
833static int32_t _mapped_pages_peak;
834//! Number of mapped master spans
835static atomic32_t _master_spans;
836//! Number of unmapped dangling master spans
837static atomic32_t _unmapped_master_spans;
838//! Running counter of total number of mapped memory pages since start
839static atomic32_t _mapped_total;
840//! Running counter of total number of unmapped memory pages since start
841static atomic32_t _unmapped_total;
842//! Number of currently mapped memory pages in OS calls
843static atomic32_t _mapped_pages_os;
844//! Number of currently allocated pages in huge allocations
845static atomic32_t _huge_pages_current;
846//! Peak number of currently allocated pages in huge allocations
847static int32_t _huge_pages_peak;
848#endif
849
850////////////
851///
852/// Thread local heap and ID
853///
854//////
855
856//! Current thread heap
857#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || \
858 defined(__TINYC__)
859static pthread_key_t _memory_thread_heap;
860#else
861#ifdef _MSC_VER
862#define _Thread_local __declspec(thread)
863#define TLS_MODEL
864#else
865#ifndef __HAIKU__
866#define TLS_MODEL __attribute__((tls_model("initial-exec")))
867#else
868#define TLS_MODEL
869#endif
870#if !defined(__clang__) && defined(__GNUC__)
871#define _Thread_local __thread
872#endif
873#endif
874static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
875#endif
876
877static inline heap_t *get_thread_heap_raw(void) {
878#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
879 return pthread_getspecific(_memory_thread_heap);
880#else
881 return _memory_thread_heap;
882#endif
883}
884
885//! Get the current thread heap
886static inline heap_t *get_thread_heap(void) {
887 heap_t *heap = get_thread_heap_raw();
888#if ENABLE_PRELOAD
889 if (EXPECTED(heap != 0))
890 return heap;
892 return get_thread_heap_raw();
893#else
894 return heap;
895#endif
896}
897
898//! Fast thread ID
899static inline uintptr_t get_thread_id(void) {
900#if defined(_WIN32)
901 return (uintptr_t)((void *)NtCurrentTeb());
902#elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
903 uintptr_t tid;
904#if defined(__i386__)
905 __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
906#elif defined(__x86_64__)
907#if defined(__MACH__)
908 __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
909#else
910 __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
911#endif
912#elif defined(__arm__)
913 __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
914#elif defined(__aarch64__)
915#if defined(__MACH__)
916 // tpidr_el0 likely unused, always return 0 on iOS
917 __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
918#else
919 __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
920#endif
921#else
922#error This platform needs implementation of get_thread_id()
923#endif
924 return tid;
925#else
926#error This platform needs implementation of get_thread_id()
927#endif
928}
929
930//! Set the current thread heap
931static void set_thread_heap(heap_t *heap) {
932#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || \
933 defined(__TINYC__)
934 pthread_setspecific(_memory_thread_heap, heap);
935#else
936 _memory_thread_heap = heap;
937#endif
938 if (heap)
939 heap->owner_thread = get_thread_id();
940}
941
942//! Set main thread ID
943extern void rpmalloc_set_main_thread(void);
944
947}
948
949static void _rpmalloc_spin(void) {
950#if defined(_MSC_VER)
951#if defined(_M_ARM64)
952 __yield();
953#else
954 _mm_pause();
955#endif
956#elif defined(__x86_64__) || defined(__i386__)
957 __asm__ volatile("pause" ::: "memory");
958#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
959 __asm__ volatile("yield" ::: "memory");
960#elif defined(__powerpc__) || defined(__powerpc64__)
961 // No idea if ever been compiled in such archs but ... as precaution
962 __asm__ volatile("or 27,27,27");
963#elif defined(__sparc__)
964 __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0");
965#else
966 struct timespec ts = {0};
967 nanosleep(&ts, 0);
968#endif
969}
970
971#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
972static void NTAPI _rpmalloc_thread_destructor(void *value) {
973#if ENABLE_OVERRIDE
974 // If this is called on main thread it means rpmalloc_finalize
975 // has not been called and shutdown is forced (through _exit) or unclean
977 return;
978#endif
979 if (value)
981}
982#endif
983
984////////////
985///
986/// Low level memory map/unmap
987///
988//////
989
990static void _rpmalloc_set_name(void *address, size_t size) {
991#if defined(__linux__) || defined(__ANDROID__)
994 if (address == MAP_FAILED || !name)
995 return;
996 // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
997 // (e.g. invalid name) it is a no-op basically.
998 (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size,
999 (uintptr_t)name);
1000#else
1001 (void)sizeof(size);
1002 (void)sizeof(address);
1003#endif
1004}
1005
1006//! Map more virtual memory
1007// size is number of bytes to map
1008// offset receives the offset in bytes from start of mapped region
1009// returns address to start of mapped region to use
1010static void *_rpmalloc_mmap(size_t size, size_t *offset) {
1011 rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size");
1012 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
1013 void *address = _memory_config.memory_map(size, offset);
1014 if (EXPECTED(address != 0)) {
1015 _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift),
1016 _mapped_pages_peak);
1017 _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
1018 }
1019 return address;
1020}
1021
1022//! Unmap virtual memory
1023// address is the memory address to unmap, as returned from _memory_map
1024// size is the number of bytes to unmap, which might be less than full region
1025// for a partial unmap offset is the offset in bytes to the actual mapped
1026// region, as set by _memory_map release is set to 0 for partial unmap, or size
1027// of entire range for a full unmap
1028static void _rpmalloc_unmap(void *address, size_t size, size_t offset,
1029 size_t release) {
1030 rpmalloc_assert(!release || (release >= size), "Invalid unmap size");
1031 rpmalloc_assert(!release || (release >= _memory_page_size),
1032 "Invalid unmap size");
1033 if (release) {
1034 rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size");
1035 _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
1036 _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
1037 }
1038 _memory_config.memory_unmap(address, size, offset, release);
1039}
1040
1041//! Default implementation to map new pages to virtual memory
1042static void *_rpmalloc_mmap_os(size_t size, size_t *offset) {
1043 // Either size is a heap (a single page) or a (multiple) span - we only need
1044 // to align spans, and only if larger than map granularity
1045 size_t padding = ((size >= _memory_span_size) &&
1048 : 0;
1049 rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
1050#if PLATFORM_WINDOWS
1051 // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not
1052 // allocated unless/until the virtual addresses are actually accessed"
1053 void *ptr = VirtualAlloc(0, size + padding,
1054 (_memory_huge_pages ? MEM_LARGE_PAGES : 0) |
1055 MEM_RESERVE | MEM_COMMIT,
1056 PAGE_READWRITE);
1057 if (!ptr) {
1059 if (_memory_config.map_fail_callback(size + padding))
1060 return _rpmalloc_mmap_os(size, offset);
1061 } else {
1062 rpmalloc_assert(ptr, "Failed to map virtual memory block");
1063 }
1064 return 0;
1065 }
1066#else
1067 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
1068#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
1069 int fd = (int)VM_MAKE_TAG(240U);
1071 fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
1072 void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
1073#elif defined(MAP_HUGETLB)
1074 void *ptr = mmap(0, size + padding,
1075 PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
1076 (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
1077#if defined(MADV_HUGEPAGE)
1078 // In some configurations, huge pages allocations might fail thus
1079 // we fallback to normal allocations and promote the region as transparent
1080 // huge page
1081 if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) {
1082 ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
1083 if (ptr && ptr != MAP_FAILED) {
1084 int prm = madvise(ptr, size + padding, MADV_HUGEPAGE);
1085 (void)prm;
1086 rpmalloc_assert((prm == 0), "Failed to promote the page to THP");
1087 }
1088 }
1089#endif
1090 _rpmalloc_set_name(ptr, size + padding);
1091#elif defined(MAP_ALIGNED)
1092 const size_t align =
1093 (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
1094 void *ptr =
1095 mmap(0, size + padding, PROT_READ | PROT_WRITE,
1096 (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
1097#elif defined(MAP_ALIGN)
1098 caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0);
1099 void *ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE,
1100 (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
1101#else
1102 void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
1103#endif
1104 if ((ptr == MAP_FAILED) || !ptr) {
1106 if (_memory_config.map_fail_callback(size + padding))
1107 return _rpmalloc_mmap_os(size, offset);
1108 } else if (errno != ENOMEM) {
1109 rpmalloc_assert((ptr != MAP_FAILED) && ptr,
1110 "Failed to map virtual memory block");
1111 }
1112 return 0;
1113 }
1114#endif
1115 _rpmalloc_stat_add(&_mapped_pages_os,
1116 (int32_t)((size + padding) >> _memory_page_size_shift));
1117 if (padding) {
1118 size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
1119 rpmalloc_assert(final_padding <= _memory_span_size,
1120 "Internal failure in padding");
1121 rpmalloc_assert(final_padding <= padding, "Internal failure in padding");
1122 rpmalloc_assert(!(final_padding % 8), "Internal failure in padding");
1123 ptr = pointer_offset(ptr, final_padding);
1124 *offset = final_padding >> 3;
1125 }
1127 !((uintptr_t)ptr & ~_memory_span_mask),
1128 "Internal failure in padding");
1129 return ptr;
1130}
1131
1132//! Default implementation to unmap pages from virtual memory
1133static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset,
1134 size_t release) {
1135 rpmalloc_assert(release || (offset == 0), "Invalid unmap size");
1136 rpmalloc_assert(!release || (release >= _memory_page_size),
1137 "Invalid unmap size");
1138 rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size");
1139 if (release && offset) {
1140 offset <<= 3;
1141 address = pointer_offset(address, -(int32_t)offset);
1142 if ((release >= _memory_span_size) &&
1144 // Padding is always one span size
1145 release += _memory_span_size;
1146 }
1147 }
1148#if !DISABLE_UNMAP
1149#if PLATFORM_WINDOWS
1150 if (!VirtualFree(address, release ? 0 : size,
1151 release ? MEM_RELEASE : MEM_DECOMMIT)) {
1152 rpmalloc_assert(0, "Failed to unmap virtual memory block");
1153 }
1154#else
1155 if (release) {
1156 if (munmap(address, release)) {
1157 rpmalloc_assert(0, "Failed to unmap virtual memory block");
1158 }
1159 } else {
1160#if defined(MADV_FREE_REUSABLE)
1161 int ret;
1162 while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 &&
1163 (errno == EAGAIN))
1164 errno = 0;
1165 if ((ret == -1) && (errno != 0)) {
1166#elif defined(MADV_DONTNEED)
1167 if (madvise(address, size, MADV_DONTNEED)) {
1168#elif defined(MADV_PAGEOUT)
1169 if (madvise(address, size, MADV_PAGEOUT)) {
1170#elif defined(MADV_FREE)
1171 if (madvise(address, size, MADV_FREE)) {
1172#else
1173 if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
1174#endif
1175 rpmalloc_assert(0, "Failed to madvise virtual memory block as free");
1176 }
1177 }
1178#endif
1179#endif
1180 if (release)
1181 _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
1182}
1183
1185 span_t *subspan,
1186 size_t span_count);
1187
1188//! Use global reserved spans to fulfill a memory map request (reserve size must
1189//! be checked by caller)
1193 span, span_count);
1194 _memory_global_reserve_count -= span_count;
1197 (span_t *)pointer_offset(span, span_count << _memory_span_size_shift);
1198 else
1200 return span;
1201}
1202
1203//! Store the given spans as global reserve (must only be called from within new
1204//! heap allocation, not thread safe)
1206 size_t reserve_span_count) {
1208 _memory_global_reserve_count = reserve_span_count;
1209 _memory_global_reserve = reserve;
1210}
1211
1212////////////
1213///
1214/// Span linked list management
1215///
1216//////
1217
1218//! Add a span to double linked list at the head
1220 if (*head)
1221 (*head)->prev = span;
1222 span->next = *head;
1223 *head = span;
1224}
1225
1226//! Pop head span from double linked list
1228 span_t *span) {
1229 rpmalloc_assert(*head == span, "Linked list corrupted");
1230 span = *head;
1231 *head = span->next;
1232}
1233
1234//! Remove a span from double linked list
1236 span_t *span) {
1237 rpmalloc_assert(*head, "Linked list corrupted");
1238 if (*head == span) {
1239 *head = span->next;
1240 } else {
1241 span_t *next_span = span->next;
1242 span_t *prev_span = span->prev;
1243 prev_span->next = next_span;
1244 if (EXPECTED(next_span != 0))
1245 next_span->prev = prev_span;
1246 }
1247}
1248
1249////////////
1250///
1251/// Span control
1252///
1253//////
1254
1255static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
1256
1257static void _rpmalloc_heap_finalize(heap_t *heap);
1258
1259static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
1260 span_t *reserve,
1261 size_t reserve_span_count);
1262
1263//! Declare the span to be a subspan and store distance from master span and
1264//! span count
1266 span_t *subspan,
1267 size_t span_count) {
1268 rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER),
1269 "Span master pointer and/or flag mismatch");
1270 if (subspan != master) {
1271 subspan->flags = SPAN_FLAG_SUBSPAN;
1272 subspan->offset_from_master =
1273 (uint32_t)((uintptr_t)pointer_diff(subspan, master) >>
1275 subspan->align_offset = 0;
1276 }
1277 subspan->span_count = (uint32_t)span_count;
1278}
1279
1280//! Use reserved spans to fulfill a memory map request (reserve size must be
1281//! checked by caller)
1283 size_t span_count) {
1284 // Update the heap span reserve
1285 span_t *span = heap->span_reserve;
1286 heap->span_reserve =
1287 (span_t *)pointer_offset(span, span_count * _memory_span_size);
1288 heap->spans_reserved -= (uint32_t)span_count;
1289
1291 span_count);
1292 if (span_count <= LARGE_CLASS_COUNT)
1293 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
1294
1295 return span;
1296}
1297
1298//! Get the aligned number of spans to map in based on wanted count, configured
1299//! mapping granularity and the page size
1300static size_t _rpmalloc_span_align_count(size_t span_count) {
1301 size_t request_count = (span_count > _memory_span_map_count)
1302 ? span_count
1305 ((request_count * _memory_span_size) % _memory_page_size))
1306 request_count +=
1308 return request_count;
1309}
1310
1311//! Setup a newly mapped span
1312static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count,
1313 size_t span_count, size_t align_offset) {
1314 span->total_spans = (uint32_t)total_span_count;
1315 span->span_count = (uint32_t)span_count;
1316 span->align_offset = (uint32_t)align_offset;
1317 span->flags = SPAN_FLAG_MASTER;
1318 atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
1319}
1320
1321static void _rpmalloc_span_unmap(span_t *span);
1322
1323//! Map an aligned set of spans, taking configured mapping granularity and the
1324//! page size into account
1326 size_t span_count) {
1327 // If we already have some, but not enough, reserved spans, release those to
1328 // heap cache and map a new full set of spans. Otherwise we would waste memory
1329 // if page size > span size (huge pages)
1330 size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
1331 size_t align_offset = 0;
1332 span_t *span = (span_t *)_rpmalloc_mmap(
1333 aligned_span_count * _memory_span_size, &align_offset);
1334 if (!span)
1335 return 0;
1336 _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
1337 _rpmalloc_stat_inc(&_master_spans);
1338 if (span_count <= LARGE_CLASS_COUNT)
1339 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
1340 if (aligned_span_count > span_count) {
1341 span_t *reserved_spans =
1342 (span_t *)pointer_offset(span, span_count * _memory_span_size);
1343 size_t reserved_count = aligned_span_count - span_count;
1344 if (heap->spans_reserved) {
1346 heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
1348 }
1349 if (reserved_count > _memory_heap_reserve_count) {
1350 // If huge pages or eager spam map count, the global reserve spin lock is
1351 // held by caller, _rpmalloc_span_map
1352 rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1,
1353 "Global spin lock not held as expected");
1354 size_t remain_count = reserved_count - _memory_heap_reserve_count;
1355 reserved_count = _memory_heap_reserve_count;
1356 span_t *remain_span = (span_t *)pointer_offset(
1357 reserved_spans, reserved_count * _memory_span_size);
1363 }
1364 _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count);
1365 }
1366 _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans,
1367 reserved_count);
1368 }
1369 return span;
1370}
1371
1372//! Map in memory pages for the given number of spans (or use previously
1373//! reserved pages)
1374static span_t *_rpmalloc_span_map(heap_t *heap, size_t span_count) {
1375 if (span_count <= heap->spans_reserved)
1376 return _rpmalloc_span_map_from_reserve(heap, span_count);
1377 span_t *span = 0;
1378 int use_global_reserve =
1381 if (use_global_reserve) {
1382 // If huge pages, make sure only one thread maps more memory to avoid bloat
1385 if (_memory_global_reserve_count >= span_count) {
1386 size_t reserve_count =
1387 (!heap->spans_reserved ? _memory_heap_reserve_count : span_count);
1388 if (_memory_global_reserve_count < reserve_count)
1389 reserve_count = _memory_global_reserve_count;
1390 span = _rpmalloc_global_get_reserved_spans(reserve_count);
1391 if (span) {
1392 if (reserve_count > span_count) {
1393 span_t *reserved_span = (span_t *)pointer_offset(
1394 span, span_count << _memory_span_size_shift);
1396 reserved_span,
1397 reserve_count - span_count);
1398 }
1399 // Already marked as subspan in _rpmalloc_global_get_reserved_spans
1400 span->span_count = (uint32_t)span_count;
1401 }
1402 }
1403 }
1404 if (!span)
1405 span = _rpmalloc_span_map_aligned_count(heap, span_count);
1406 if (use_global_reserve)
1408 return span;
1409}
1410
1411//! Unmap memory pages for the given number of spans (or mark as unused if no
1412//! partial unmappings)
1413static void _rpmalloc_span_unmap(span_t *span) {
1415 (span->flags & SPAN_FLAG_SUBSPAN),
1416 "Span flag corrupted");
1418 !(span->flags & SPAN_FLAG_SUBSPAN),
1419 "Span flag corrupted");
1420
1421 int is_master = !!(span->flags & SPAN_FLAG_MASTER);
1422 span_t *master =
1423 is_master ? span
1424 : ((span_t *)pointer_offset(
1425 span, -(intptr_t)((uintptr_t)span->offset_from_master *
1427 rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN),
1428 "Span flag corrupted");
1429 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
1430
1431 size_t span_count = span->span_count;
1432 if (!is_master) {
1433 // Directly unmap subspans (unless huge pages, in which case we defer and
1434 // unmap entire page range with master)
1435 rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted");
1437 _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
1438 } else {
1439 // Special double flag to denote an unmapped master
1440 // It must be kept in memory since span header must be used
1441 span->flags |=
1443 _rpmalloc_stat_add(&_unmapped_master_spans, 1);
1444 }
1445
1446 if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
1447 // Everything unmapped, unmap the master span with release flag to unmap the
1448 // entire range of the super span
1449 rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) &&
1450 !!(master->flags & SPAN_FLAG_SUBSPAN),
1451 "Span flag corrupted");
1452 size_t unmap_count = master->span_count;
1454 unmap_count = master->total_spans;
1455 _rpmalloc_stat_sub(&_master_spans, 1);
1456 _rpmalloc_stat_sub(&_unmapped_master_spans, 1);
1457 _rpmalloc_unmap(master, unmap_count * _memory_span_size,
1458 master->align_offset,
1459 (size_t)master->total_spans * _memory_span_size);
1460 }
1461}
1462
1463//! Move the span (used for small or medium allocations) to the heap thread
1464//! cache
1466 rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted");
1468 "Invalid span size class");
1469 rpmalloc_assert(span->span_count == 1, "Invalid span count");
1470#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1471 atomic_decr32(&heap->span_use[0].current);
1472#endif
1473 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1474 if (!heap->finalize) {
1475 _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
1476 _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
1477 if (heap->size_class[span->size_class].cache)
1479 heap->size_class[span->size_class].cache);
1480 heap->size_class[span->size_class].cache = span;
1481 } else {
1483 }
1484}
1485
1486//! Initialize a (partial) free list up to next system memory page, while
1487//! reserving the first block as allocated, returning number of blocks in list
1488static uint32_t free_list_partial_init(void **list, void **first_block,
1489 void *page_start, void *block_start,
1490 uint32_t block_count,
1491 uint32_t block_size) {
1492 rpmalloc_assert(block_count, "Internal failure");
1493 *first_block = block_start;
1494 if (block_count > 1) {
1495 void *free_block = pointer_offset(block_start, block_size);
1496 void *block_end =
1497 pointer_offset(block_start, (size_t)block_size * block_count);
1498 // If block size is less than half a memory page, bound init to next memory
1499 // page boundary
1500 if (block_size < (_memory_page_size >> 1)) {
1501 void *page_end = pointer_offset(page_start, _memory_page_size);
1502 if (page_end < block_end)
1503 block_end = page_end;
1504 }
1505 *list = free_block;
1506 block_count = 2;
1507 void *next_block = pointer_offset(free_block, block_size);
1508 while (next_block < block_end) {
1509 *((void **)free_block) = next_block;
1510 free_block = next_block;
1511 ++block_count;
1512 next_block = pointer_offset(next_block, block_size);
1513 }
1514 *((void **)free_block) = 0;
1515 } else {
1516 *list = 0;
1517 }
1518 return block_count;
1519}
1520
1521//! Initialize an unused span (from cache or mapped) to be new active span,
1522//! putting the initial free list in heap class free list
1524 heap_size_class_t *heap_size_class,
1525 span_t *span, uint32_t class_idx) {
1526 rpmalloc_assert(span->span_count == 1, "Internal failure");
1527 size_class_t *size_class = _memory_size_class + class_idx;
1528 span->size_class = class_idx;
1529 span->heap = heap;
1530 span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
1531 span->block_size = size_class->block_size;
1532 span->block_count = size_class->block_count;
1533 span->free_list = 0;
1534 span->list_size = 0;
1536
1537 // Setup free list. Only initialize one system page worth of free blocks in
1538 // list
1539 void *block;
1540 span->free_list_limit =
1541 free_list_partial_init(&heap_size_class->free_list, &block, span,
1543 size_class->block_count, size_class->block_size);
1544 // Link span as partial if there remains blocks to be initialized as free
1545 // list, or full if fully initialized
1546 if (span->free_list_limit < span->block_count) {
1547 _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span);
1548 span->used_count = span->free_list_limit;
1549 } else {
1550#if RPMALLOC_FIRST_CLASS_HEAPS
1551 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1552#endif
1553 ++heap->full_span_count;
1554 span->used_count = span->block_count;
1555 }
1556 return block;
1557}
1558
1560 // We need acquire semantics on the CAS operation since we are interested in
1561 // the list size Refer to _rpmalloc_deallocate_defer_small_or_medium for
1562 // further comments on this dependency
1563 do {
1564 span->free_list =
1566 } while (span->free_list == INVALID_POINTER);
1567 span->used_count -= span->list_size;
1568 span->list_size = 0;
1570}
1571
1574 "Span free list corrupted");
1575 return !span->free_list && (span->free_list_limit >= span->block_count);
1576}
1577
1578static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span,
1579 span_t **list_head) {
1580 void *free_list = heap->size_class[iclass].free_list;
1581 span_t *class_span = (span_t *)((uintptr_t)free_list & _memory_span_mask);
1582 if (span == class_span) {
1583 // Adopt the heap class free list back into the span free list
1584 void *block = span->free_list;
1585 void *last_block = 0;
1586 while (block) {
1587 last_block = block;
1588 block = *((void **)block);
1589 }
1590 uint32_t free_count = 0;
1591 block = free_list;
1592 while (block) {
1593 ++free_count;
1594 block = *((void **)block);
1595 }
1596 if (last_block) {
1597 *((void **)last_block) = free_list;
1598 } else {
1599 span->free_list = free_list;
1600 }
1601 heap->size_class[iclass].free_list = 0;
1602 span->used_count -= free_count;
1603 }
1604 // If this assert triggers you have memory leaks
1605 rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected");
1606 if (span->list_size == span->used_count) {
1607 _rpmalloc_stat_dec(&heap->span_use[0].current);
1608 _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
1609 // This function only used for spans in double linked lists
1610 if (list_head)
1613 return 1;
1614 }
1615 return 0;
1616}
1617
1618////////////
1619///
1620/// Global cache
1621///
1622//////
1623
1624#if ENABLE_GLOBAL_CACHE
1625
1626//! Finalize a global cache
1627static void _rpmalloc_global_cache_finalize(global_cache_t *cache) {
1628 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1630
1631 for (size_t ispan = 0; ispan < cache->count; ++ispan)
1632 _rpmalloc_span_unmap(cache->span[ispan]);
1633 cache->count = 0;
1634
1635 while (cache->overflow) {
1636 span_t *span = cache->overflow;
1637 cache->overflow = span->next;
1639 }
1640
1641 atomic_store32_release(&cache->lock, 0);
1642}
1643
1644static void _rpmalloc_global_cache_insert_spans(span_t **span,
1645 size_t span_count,
1646 size_t count) {
1647 const size_t cache_limit =
1650 (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
1651
1652 global_cache_t *cache = &_memory_span_cache[span_count - 1];
1653
1654 size_t insert_count = count;
1655 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1657
1658#if ENABLE_STATISTICS
1659 cache->insert_count += count;
1660#endif
1661 if ((cache->count + insert_count) > cache_limit)
1662 insert_count = cache_limit - cache->count;
1663
1664 memcpy(cache->span + cache->count, span, sizeof(span_t *) * insert_count);
1665 cache->count += (uint32_t)insert_count;
1666
1667#if ENABLE_UNLIMITED_CACHE
1668 while (insert_count < count) {
1669#else
1670 // Enable unlimited cache if huge pages, or we will leak since it is unlikely
1671 // that an entire huge page will be unmapped, and we're unable to partially
1672 // decommit a huge page
1673 while ((_memory_page_size > _memory_span_size) && (insert_count < count)) {
1674#endif
1675 span_t *current_span = span[insert_count++];
1676 current_span->next = cache->overflow;
1677 cache->overflow = current_span;
1678 }
1679 atomic_store32_release(&cache->lock, 0);
1680
1681 span_t *keep = 0;
1682 for (size_t ispan = insert_count; ispan < count; ++ispan) {
1683 span_t *current_span = span[ispan];
1684 // Keep master spans that has remaining subspans to avoid dangling them
1685 if ((current_span->flags & SPAN_FLAG_MASTER) &&
1686 (atomic_load32(&current_span->remaining_spans) >
1687 (int32_t)current_span->span_count)) {
1688 current_span->next = keep;
1689 keep = current_span;
1690 } else {
1691 _rpmalloc_span_unmap(current_span);
1692 }
1693 }
1694
1695 if (keep) {
1696 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1698
1699 size_t islot = 0;
1700 while (keep) {
1701 for (; islot < cache->count; ++islot) {
1702 span_t *current_span = cache->span[islot];
1703 if (!(current_span->flags & SPAN_FLAG_MASTER) ||
1704 ((current_span->flags & SPAN_FLAG_MASTER) &&
1705 (atomic_load32(&current_span->remaining_spans) <=
1706 (int32_t)current_span->span_count))) {
1707 _rpmalloc_span_unmap(current_span);
1708 cache->span[islot] = keep;
1709 break;
1710 }
1711 }
1712 if (islot == cache->count)
1713 break;
1714 keep = keep->next;
1715 }
1716
1717 if (keep) {
1718 span_t *tail = keep;
1719 while (tail->next)
1720 tail = tail->next;
1721 tail->next = cache->overflow;
1722 cache->overflow = keep;
1723 }
1724
1725 atomic_store32_release(&cache->lock, 0);
1726 }
1727}
1728
1729static size_t _rpmalloc_global_cache_extract_spans(span_t **span,
1730 size_t span_count,
1731 size_t count) {
1732 global_cache_t *cache = &_memory_span_cache[span_count - 1];
1733
1734 size_t extract_count = 0;
1735 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1737
1738#if ENABLE_STATISTICS
1739 cache->extract_count += count;
1740#endif
1741 size_t want = count - extract_count;
1742 if (want > cache->count)
1743 want = cache->count;
1744
1745 memcpy(span + extract_count, cache->span + (cache->count - want),
1746 sizeof(span_t *) * want);
1747 cache->count -= (uint32_t)want;
1748 extract_count += want;
1749
1750 while ((extract_count < count) && cache->overflow) {
1751 span_t *current_span = cache->overflow;
1752 span[extract_count++] = current_span;
1753 cache->overflow = current_span->next;
1754 }
1755
1756#if ENABLE_ASSERTS
1757 for (size_t ispan = 0; ispan < extract_count; ++ispan) {
1758 rpmalloc_assert(span[ispan]->span_count == span_count,
1759 "Global cache span count mismatch");
1760 }
1761#endif
1762
1763 atomic_store32_release(&cache->lock, 0);
1764
1765 return extract_count;
1766}
1767
1768#endif
1769
1770////////////
1771///
1772/// Heap control
1773///
1774//////
1775
1776static void _rpmalloc_deallocate_huge(span_t *);
1777
1778//! Store the given spans as reserve in the given heap
1780 span_t *reserve,
1781 size_t reserve_span_count) {
1782 heap->span_reserve_master = master;
1783 heap->span_reserve = reserve;
1784 heap->spans_reserved = (uint32_t)reserve_span_count;
1785}
1786
1787//! Adopt the deferred span cache list, optionally extracting the first single
1788//! span for immediate re-use
1790 span_t **single_span) {
1791 span_t *span = (span_t *)((void *)atomic_exchange_ptr_acquire(
1792 &heap->span_free_deferred, 0));
1793 while (span) {
1794 span_t *next_span = (span_t *)span->free_list;
1795 rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted");
1796 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
1797 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
1798 --heap->full_span_count;
1799 _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred);
1800#if RPMALLOC_FIRST_CLASS_HEAPS
1801 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
1802 span);
1803#endif
1804 _rpmalloc_stat_dec(&heap->span_use[0].current);
1805 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1806 if (single_span && !*single_span)
1807 *single_span = span;
1808 else
1809 _rpmalloc_heap_cache_insert(heap, span);
1810 } else {
1811 if (span->size_class == SIZE_CLASS_HUGE) {
1813 } else {
1815 "Span size class invalid");
1816 rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
1817 --heap->full_span_count;
1818#if RPMALLOC_FIRST_CLASS_HEAPS
1819 _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
1820#endif
1821 uint32_t idx = span->span_count - 1;
1822 _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred);
1823 _rpmalloc_stat_dec(&heap->span_use[idx].current);
1824 if (!idx && single_span && !*single_span)
1825 *single_span = span;
1826 else
1827 _rpmalloc_heap_cache_insert(heap, span);
1828 }
1829 }
1830 span = next_span;
1831 }
1832}
1833
1834static void _rpmalloc_heap_unmap(heap_t *heap) {
1835 if (!heap->master_heap) {
1836 if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
1837 span_t *span = (span_t *)((uintptr_t)heap & _memory_span_mask);
1839 }
1840 } else {
1841 if (atomic_decr32(&heap->master_heap->child_count) == 0) {
1843 }
1844 }
1845}
1846
1848 if (heap->finalize++ > 1) {
1849 --heap->finalize;
1850 return;
1851 }
1852
1854
1855#if ENABLE_THREAD_CACHE
1856 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1857 span_cache_t *span_cache;
1858 if (!iclass)
1859 span_cache = &heap->span_cache;
1860 else
1861 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
1862 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
1863 _rpmalloc_span_unmap(span_cache->span[ispan]);
1864 span_cache->count = 0;
1865 }
1866#endif
1867
1868 if (heap->full_span_count) {
1869 --heap->finalize;
1870 return;
1871 }
1872
1873 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1874 if (heap->size_class[iclass].free_list ||
1875 heap->size_class[iclass].partial_span) {
1876 --heap->finalize;
1877 return;
1878 }
1879 }
1880 // Heap is now completely free, unmap and remove from heap list
1881 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
1882 heap_t *list_heap = _memory_heaps[list_idx];
1883 if (list_heap == heap) {
1884 _memory_heaps[list_idx] = heap->next_heap;
1885 } else {
1886 while (list_heap->next_heap != heap)
1887 list_heap = list_heap->next_heap;
1888 list_heap->next_heap = heap->next_heap;
1889 }
1890
1892}
1893
1894//! Insert a single span into thread heap cache, releasing to global cache if
1895//! overflow
1896static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
1897 if (UNEXPECTED(heap->finalize != 0)) {
1900 return;
1901 }
1902#if ENABLE_THREAD_CACHE
1903 size_t span_count = span->span_count;
1904 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
1905 if (span_count == 1) {
1906 span_cache_t *span_cache = &heap->span_cache;
1907 span_cache->span[span_cache->count++] = span;
1908 if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
1909 const size_t remain_count =
1911#if ENABLE_GLOBAL_CACHE
1912 _rpmalloc_stat_add64(&heap->thread_to_global,
1914 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
1916 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
1917 span_count,
1919#else
1920 for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
1921 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
1922#endif
1923 span_cache->count = remain_count;
1924 }
1925 } else {
1926 size_t cache_idx = span_count - 2;
1927 span_large_cache_t *span_cache = heap->span_large_cache + cache_idx;
1928 span_cache->span[span_cache->count++] = span;
1929 const size_t cache_limit =
1930 (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
1931 if (span_cache->count == cache_limit) {
1932 const size_t transfer_limit = 2 + (cache_limit >> 2);
1933 const size_t transfer_count =
1934 (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit
1936 : transfer_limit);
1937 const size_t remain_count = cache_limit - transfer_count;
1938#if ENABLE_GLOBAL_CACHE
1939 _rpmalloc_stat_add64(&heap->thread_to_global,
1940 transfer_count * span_count * _memory_span_size);
1941 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
1942 transfer_count);
1943 _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
1944 span_count, transfer_count);
1945#else
1946 for (size_t ispan = 0; ispan < transfer_count; ++ispan)
1947 _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
1948#endif
1949 span_cache->count = remain_count;
1950 }
1951 }
1952#else
1953 (void)sizeof(heap);
1955#endif
1956}
1957
1958//! Extract the given number of spans from the different cache levels
1960 size_t span_count) {
1961 span_t *span = 0;
1962#if ENABLE_THREAD_CACHE
1963 span_cache_t *span_cache;
1964 if (span_count == 1)
1965 span_cache = &heap->span_cache;
1966 else
1967 span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
1968 if (span_cache->count) {
1969 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
1970 return span_cache->span[--span_cache->count];
1971 }
1972#endif
1973 return span;
1974}
1975
1977 size_t span_count) {
1978 span_t *span = 0;
1979 if (span_count == 1) {
1981 } else {
1983 span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
1984 }
1985 return span;
1986}
1987
1989 size_t span_count) {
1990 if (heap->spans_reserved >= span_count)
1991 return _rpmalloc_span_map(heap, span_count);
1992 return 0;
1993}
1994
1995//! Extract a span from the global cache
1997 size_t span_count) {
1998#if ENABLE_GLOBAL_CACHE
1999#if ENABLE_THREAD_CACHE
2000 span_cache_t *span_cache;
2001 size_t wanted_count;
2002 if (span_count == 1) {
2003 span_cache = &heap->span_cache;
2004 wanted_count = THREAD_SPAN_CACHE_TRANSFER;
2005 } else {
2006 span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
2007 wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER;
2008 }
2009 span_cache->count = _rpmalloc_global_cache_extract_spans(
2010 span_cache->span, span_count, wanted_count);
2011 if (span_cache->count) {
2012 _rpmalloc_stat_add64(&heap->global_to_thread,
2013 span_count * span_cache->count * _memory_span_size);
2014 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
2015 span_cache->count);
2016 return span_cache->span[--span_cache->count];
2017 }
2018#else
2019 span_t *span = 0;
2020 size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
2021 if (count) {
2022 _rpmalloc_stat_add64(&heap->global_to_thread,
2023 span_count * count * _memory_span_size);
2024 _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
2025 count);
2026 return span;
2027 }
2028#endif
2029#endif
2030 (void)sizeof(heap);
2031 (void)sizeof(span_count);
2032 return 0;
2033}
2034
2035static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count,
2036 uint32_t class_idx) {
2037 (void)sizeof(heap);
2038 (void)sizeof(span_count);
2039 (void)sizeof(class_idx);
2040#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
2041 uint32_t idx = (uint32_t)span_count - 1;
2042 uint32_t current_count =
2043 (uint32_t)atomic_incr32(&heap->span_use[idx].current);
2044 if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
2045 atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
2046 _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1,
2047 heap->size_class_use[class_idx].spans_peak);
2048#endif
2049}
2050
2051//! Get a span from one of the cache levels (thread cache, reserved, global
2052//! cache) or fallback to mapping more memory
2053static span_t *
2055 heap_size_class_t *heap_size_class,
2056 size_t span_count, uint32_t class_idx) {
2057 span_t *span;
2058#if ENABLE_THREAD_CACHE
2059 if (heap_size_class && heap_size_class->cache) {
2060 span = heap_size_class->cache;
2061 heap_size_class->cache =
2062 (heap->span_cache.count
2063 ? heap->span_cache.span[--heap->span_cache.count]
2064 : 0);
2065 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2066 return span;
2067 }
2068#endif
2069 (void)sizeof(class_idx);
2070 // Allow 50% overhead to increase cache hits
2071 size_t base_span_count = span_count;
2072 size_t limit_span_count =
2073 (span_count > 2) ? (span_count + (span_count >> 1)) : span_count;
2074 if (limit_span_count > LARGE_CLASS_COUNT)
2075 limit_span_count = LARGE_CLASS_COUNT;
2076 do {
2077 span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
2078 if (EXPECTED(span != 0)) {
2079 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
2080 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2081 return span;
2082 }
2083 span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count);
2084 if (EXPECTED(span != 0)) {
2085 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
2086 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2087 return span;
2088 }
2089 span = _rpmalloc_heap_global_cache_extract(heap, span_count);
2090 if (EXPECTED(span != 0)) {
2091 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
2092 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2093 return span;
2094 }
2095 span = _rpmalloc_heap_reserved_extract(heap, span_count);
2096 if (EXPECTED(span != 0)) {
2097 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
2098 _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
2099 return span;
2100 }
2101 ++span_count;
2102 } while (span_count <= limit_span_count);
2103 // Final fallback, map in more virtual memory
2104 span = _rpmalloc_span_map(heap, base_span_count);
2105 _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx);
2106 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
2107 return span;
2108}
2109
2111 _rpmalloc_memset_const(heap, 0, sizeof(heap_t));
2112 // Get a new heap ID
2113 heap->id = 1 + atomic_incr32(&_memory_heap_id);
2114
2115 // Link in heap in heap ID map
2116 size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
2117 heap->next_heap = _memory_heaps[list_idx];
2118 _memory_heaps[list_idx] = heap;
2119}
2120
2121static void _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
2122 heap->owner_thread = (uintptr_t)-1;
2123#if RPMALLOC_FIRST_CLASS_HEAPS
2124 heap_t **heap_list =
2125 (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
2126#else
2127 (void)sizeof(first_class);
2128 heap_t **heap_list = &_memory_orphan_heaps;
2129#endif
2130 heap->next_orphan = *heap_list;
2131 *heap_list = heap;
2132}
2133
2134//! Allocate a new heap from newly mapped memory pages
2136 // Map in pages for a 16 heaps. If page size is greater than required size for
2137 // this, map a page and use first part for heaps and remaining part for spans
2138 // for allocations. Adds a lot of complexity, but saves a lot of memory on
2139 // systems where page size > 64 spans (4MiB)
2140 size_t heap_size = sizeof(heap_t);
2141 size_t aligned_heap_size = 16 * ((heap_size + 15) / 16);
2142 size_t request_heap_count = 16;
2143 size_t heap_span_count = ((aligned_heap_size * request_heap_count) +
2144 sizeof(span_t) + _memory_span_size - 1) /
2146 size_t block_size = _memory_span_size * heap_span_count;
2147 size_t span_count = heap_span_count;
2148 span_t *span = 0;
2149 // If there are global reserved spans, use these first
2150 if (_memory_global_reserve_count >= heap_span_count) {
2151 span = _rpmalloc_global_get_reserved_spans(heap_span_count);
2152 }
2153 if (!span) {
2154 if (_memory_page_size > block_size) {
2155 span_count = _memory_page_size / _memory_span_size;
2156 block_size = _memory_page_size;
2157 // If using huge pages, make sure to grab enough heaps to avoid
2158 // reallocating a huge page just to serve new heaps
2159 size_t possible_heap_count =
2160 (block_size - sizeof(span_t)) / aligned_heap_size;
2161 if (possible_heap_count >= (request_heap_count * 16))
2162 request_heap_count *= 16;
2163 else if (possible_heap_count < request_heap_count)
2164 request_heap_count = possible_heap_count;
2165 heap_span_count = ((aligned_heap_size * request_heap_count) +
2166 sizeof(span_t) + _memory_span_size - 1) /
2168 }
2169
2170 size_t align_offset = 0;
2171 span = (span_t *)_rpmalloc_mmap(block_size, &align_offset);
2172 if (!span)
2173 return 0;
2174
2175 // Master span will contain the heaps
2176 _rpmalloc_stat_inc(&_master_spans);
2177 _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset);
2178 }
2179
2180 size_t remain_size = _memory_span_size - sizeof(span_t);
2181 heap_t *heap = (heap_t *)pointer_offset(span, sizeof(span_t));
2183
2184 // Put extra heaps as orphans
2185 size_t num_heaps = remain_size / aligned_heap_size;
2186 if (num_heaps < request_heap_count)
2187 num_heaps = request_heap_count;
2188 atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
2189 heap_t *extra_heap = (heap_t *)pointer_offset(heap, aligned_heap_size);
2190 while (num_heaps > 1) {
2191 _rpmalloc_heap_initialize(extra_heap);
2192 extra_heap->master_heap = heap;
2193 _rpmalloc_heap_orphan(extra_heap, 1);
2194 extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
2195 --num_heaps;
2196 }
2197
2198 if (span_count > heap_span_count) {
2199 // Cap reserved spans
2200 size_t remain_count = span_count - heap_span_count;
2201 size_t reserve_count =
2203 : remain_count);
2204 span_t *remain_span =
2205 (span_t *)pointer_offset(span, heap_span_count * _memory_span_size);
2206 _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count);
2207
2208 if (remain_count > reserve_count) {
2209 // Set to global reserved spans
2210 remain_span = (span_t *)pointer_offset(remain_span,
2211 reserve_count * _memory_span_size);
2212 reserve_count = remain_count - reserve_count;
2213 _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count);
2214 }
2215 }
2216
2217 return heap;
2218}
2219
2221 heap_t *heap = *heap_list;
2222 *heap_list = (heap ? heap->next_orphan : 0);
2223 return heap;
2224}
2225
2226//! Allocate a new heap, potentially reusing a previously orphaned heap
2227static heap_t *_rpmalloc_heap_allocate(int first_class) {
2228 heap_t *heap = 0;
2231 if (first_class == 0)
2233#if RPMALLOC_FIRST_CLASS_HEAPS
2234 if (!heap)
2235 heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
2236#endif
2237 if (!heap)
2240 if (heap)
2242 return heap;
2243}
2244
2245static void _rpmalloc_heap_release(void *heapptr, int first_class,
2246 int release_cache) {
2247 heap_t *heap = (heap_t *)heapptr;
2248 if (!heap)
2249 return;
2250 // Release thread cache spans back to global cache
2252 if (release_cache || heap->finalize) {
2253#if ENABLE_THREAD_CACHE
2254 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2255 span_cache_t *span_cache;
2256 if (!iclass)
2257 span_cache = &heap->span_cache;
2258 else
2259 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
2260 if (!span_cache->count)
2261 continue;
2262#if ENABLE_GLOBAL_CACHE
2263 if (heap->finalize) {
2264 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
2265 _rpmalloc_span_unmap(span_cache->span[ispan]);
2266 } else {
2267 _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count *
2268 (iclass + 1) *
2270 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
2271 span_cache->count);
2272 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
2273 span_cache->count);
2274 }
2275#else
2276 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
2277 _rpmalloc_span_unmap(span_cache->span[ispan]);
2278#endif
2279 span_cache->count = 0;
2280 }
2281#endif
2282 }
2283
2284 if (get_thread_heap_raw() == heap)
2285 set_thread_heap(0);
2286
2287#if ENABLE_STATISTICS
2288 atomic_decr32(&_memory_active_heaps);
2289 rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0,
2290 "Still active heaps during finalization");
2291#endif
2292
2293 // If we are forcibly terminating with _exit the state of the
2294 // lock atomic is unknown and it's best to just go ahead and exit
2298 }
2299 _rpmalloc_heap_orphan(heap, first_class);
2301}
2302
2303static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache) {
2304 _rpmalloc_heap_release(heapptr, 0, release_cache);
2305}
2306
2307static void _rpmalloc_heap_release_raw_fc(void *heapptr) {
2308 _rpmalloc_heap_release_raw(heapptr, 1);
2309}
2310
2312 if (heap->spans_reserved) {
2313 span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
2315 heap->spans_reserved = 0;
2316 }
2317
2319
2320 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2321 if (heap->size_class[iclass].cache)
2322 _rpmalloc_span_unmap(heap->size_class[iclass].cache);
2323 heap->size_class[iclass].cache = 0;
2324 span_t *span = heap->size_class[iclass].partial_span;
2325 while (span) {
2326 span_t *next = span->next;
2327 _rpmalloc_span_finalize(heap, iclass, span,
2328 &heap->size_class[iclass].partial_span);
2329 span = next;
2330 }
2331 // If class still has a free list it must be a full span
2332 if (heap->size_class[iclass].free_list) {
2333 span_t *class_span =
2334 (span_t *)((uintptr_t)heap->size_class[iclass].free_list &
2336 span_t **list = 0;
2337#if RPMALLOC_FIRST_CLASS_HEAPS
2338 list = &heap->full_span[iclass];
2339#endif
2340 --heap->full_span_count;
2341 if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
2342 if (list)
2345 &heap->size_class[iclass].partial_span, class_span);
2346 }
2347 }
2348 }
2349
2350#if ENABLE_THREAD_CACHE
2351 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2352 span_cache_t *span_cache;
2353 if (!iclass)
2354 span_cache = &heap->span_cache;
2355 else
2356 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
2357 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
2358 _rpmalloc_span_unmap(span_cache->span[ispan]);
2359 span_cache->count = 0;
2360 }
2361#endif
2363 "Heaps still active during finalization");
2364}
2365
2366////////////
2367///
2368/// Allocation entry points
2369///
2370//////
2371
2372//! Pop first block from a free list
2373static void *free_list_pop(void **list) {
2374 void *block = *list;
2375 *list = *((void **)block);
2376 return block;
2377}
2378
2379//! Allocate a small/medium sized memory block from the given heap
2381 heap_t *heap, heap_size_class_t *heap_size_class, uint32_t class_idx) {
2382 span_t *span = heap_size_class->partial_span;
2383 rpmalloc_assume(heap != 0);
2384 if (EXPECTED(span != 0)) {
2387 "Span block count corrupted");
2389 "Internal failure");
2390 void *block;
2391 if (span->free_list) {
2392 // Span local free list is not empty, swap to size class free list
2393 block = free_list_pop(&span->free_list);
2394 heap_size_class->free_list = span->free_list;
2395 span->free_list = 0;
2396 } else {
2397 // If the span did not fully initialize free list, link up another page
2398 // worth of blocks
2399 void *block_start = pointer_offset(
2400 span, SPAN_HEADER_SIZE +
2401 ((size_t)span->free_list_limit * span->block_size));
2403 &heap_size_class->free_list, &block,
2404 (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)),
2405 block_start, span->block_count - span->free_list_limit,
2406 span->block_size);
2407 }
2409 "Span block count corrupted");
2410 span->used_count = span->free_list_limit;
2411
2412 // Swap in deferred free list if present
2415
2416 // If span is still not fully utilized keep it in partial list and early
2417 // return block
2419 return block;
2420
2421 // The span is fully utilized, unlink from partial list and add to fully
2422 // utilized list
2424 span);
2425#if RPMALLOC_FIRST_CLASS_HEAPS
2426 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
2427#endif
2428 ++heap->full_span_count;
2429 return block;
2430 }
2431
2432 // Find a span in one of the cache levels
2433 span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx);
2434 if (EXPECTED(span != 0)) {
2435 // Mark span as owned by this heap and set base data, return first block
2436 return _rpmalloc_span_initialize_new(heap, heap_size_class, span,
2437 class_idx);
2438 }
2439
2440 return 0;
2441}
2442
2443//! Allocate a small sized memory block from the given heap
2444static void *_rpmalloc_allocate_small(heap_t *heap, size_t size) {
2445 rpmalloc_assert(heap, "No thread heap");
2446 // Small sizes have unique size classes
2447 const uint32_t class_idx =
2449 heap_size_class_t *heap_size_class = heap->size_class + class_idx;
2450 _rpmalloc_stat_inc_alloc(heap, class_idx);
2451 if (EXPECTED(heap_size_class->free_list != 0))
2452 return free_list_pop(&heap_size_class->free_list);
2453 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
2454 class_idx);
2455}
2456
2457//! Allocate a medium sized memory block from the given heap
2458static void *_rpmalloc_allocate_medium(heap_t *heap, size_t size) {
2459 rpmalloc_assert(heap, "No thread heap");
2460 // Calculate the size class index and do a dependent lookup of the final class
2461 // index (in case of merged classes)
2462 const uint32_t base_idx =
2464 ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
2465 const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
2466 heap_size_class_t *heap_size_class = heap->size_class + class_idx;
2467 _rpmalloc_stat_inc_alloc(heap, class_idx);
2468 if (EXPECTED(heap_size_class->free_list != 0))
2469 return free_list_pop(&heap_size_class->free_list);
2470 return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
2471 class_idx);
2472}
2473
2474//! Allocate a large sized memory block from the given heap
2475static void *_rpmalloc_allocate_large(heap_t *heap, size_t size) {
2476 rpmalloc_assert(heap, "No thread heap");
2477 // Calculate number of needed max sized spans (including header)
2478 // Since this function is never called if size > LARGE_SIZE_LIMIT
2479 // the span_count is guaranteed to be <= LARGE_CLASS_COUNT
2480 size += SPAN_HEADER_SIZE;
2481 size_t span_count = size >> _memory_span_size_shift;
2482 if (size & (_memory_span_size - 1))
2483 ++span_count;
2484
2485 // Find a span in one of the cache levels
2486 span_t *span =
2488 if (!span)
2489 return span;
2490
2491 // Mark span as owned by this heap and set base data
2492 rpmalloc_assert(span->span_count >= span_count, "Internal failure");
2494 span->heap = heap;
2495
2496#if RPMALLOC_FIRST_CLASS_HEAPS
2497 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
2498#endif
2499 ++heap->full_span_count;
2500
2501 return pointer_offset(span, SPAN_HEADER_SIZE);
2502}
2503
2504//! Allocate a huge block by mapping memory pages directly
2505static void *_rpmalloc_allocate_huge(heap_t *heap, size_t size) {
2506 rpmalloc_assert(heap, "No thread heap");
2508 size += SPAN_HEADER_SIZE;
2509 size_t num_pages = size >> _memory_page_size_shift;
2510 if (size & (_memory_page_size - 1))
2511 ++num_pages;
2512 size_t align_offset = 0;
2513 span_t *span =
2514 (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
2515 if (!span)
2516 return span;
2517
2518 // Store page count in span_count
2520 span->span_count = (uint32_t)num_pages;
2521 span->align_offset = (uint32_t)align_offset;
2522 span->heap = heap;
2523 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
2524
2525#if RPMALLOC_FIRST_CLASS_HEAPS
2526 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
2527#endif
2528 ++heap->full_span_count;
2529
2530 return pointer_offset(span, SPAN_HEADER_SIZE);
2531}
2532
2533//! Allocate a block of the given size
2534static void *_rpmalloc_allocate(heap_t *heap, size_t size) {
2535 _rpmalloc_stat_add64(&_allocation_counter, 1);
2536 if (EXPECTED(size <= SMALL_SIZE_LIMIT))
2537 return _rpmalloc_allocate_small(heap, size);
2538 else if (size <= _memory_medium_size_limit)
2539 return _rpmalloc_allocate_medium(heap, size);
2540 else if (size <= LARGE_SIZE_LIMIT)
2541 return _rpmalloc_allocate_large(heap, size);
2542 return _rpmalloc_allocate_huge(heap, size);
2543}
2544
2545static void *_rpmalloc_aligned_allocate(heap_t *heap, size_t alignment,
2546 size_t size) {
2547 if (alignment <= SMALL_GRANULARITY)
2548 return _rpmalloc_allocate(heap, size);
2549
2550#if ENABLE_VALIDATE_ARGS
2551 if ((size + alignment) < size) {
2552 errno = EINVAL;
2553 return 0;
2554 }
2555 if (alignment & (alignment - 1)) {
2556 errno = EINVAL;
2557 return 0;
2558 }
2559#endif
2560
2561 if ((alignment <= SPAN_HEADER_SIZE) &&
2563 // If alignment is less or equal to span header size (which is power of
2564 // two), and size aligned to span header size multiples is less than size +
2565 // alignment, then use natural alignment of blocks to provide alignment
2566 size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) &
2567 ~(uintptr_t)(SPAN_HEADER_SIZE - 1)
2569 rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE),
2570 "Failed alignment calculation");
2571 if (multiple_size <= (size + alignment))
2572 return _rpmalloc_allocate(heap, multiple_size);
2573 }
2574
2575 void *ptr = 0;
2576 size_t align_mask = alignment - 1;
2577 if (alignment <= _memory_page_size) {
2578 ptr = _rpmalloc_allocate(heap, size + alignment);
2579 if ((uintptr_t)ptr & align_mask) {
2580 ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
2581 // Mark as having aligned blocks
2582 span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
2584 }
2585 return ptr;
2586 }
2587
2588 // Fallback to mapping new pages for this request. Since pointers passed
2589 // to rpfree must be able to reach the start of the span by bitmasking of
2590 // the address with the span size, the returned aligned pointer from this
2591 // function must be with a span size of the start of the mapped area.
2592 // In worst case this requires us to loop and map pages until we get a
2593 // suitable memory address. It also means we can never align to span size
2594 // or greater, since the span header will push alignment more than one
2595 // span size away from span start (thus causing pointer mask to give us
2596 // an invalid span start on free)
2597 if (alignment & align_mask) {
2598 errno = EINVAL;
2599 return 0;
2600 }
2601 if (alignment >= _memory_span_size) {
2602 errno = EINVAL;
2603 return 0;
2604 }
2605
2606 size_t extra_pages = alignment / _memory_page_size;
2607
2608 // Since each span has a header, we will at least need one extra memory page
2609 size_t num_pages = 1 + (size / _memory_page_size);
2610 if (size & (_memory_page_size - 1))
2611 ++num_pages;
2612
2613 if (extra_pages > num_pages)
2614 num_pages = 1 + extra_pages;
2615
2616 size_t original_pages = num_pages;
2617 size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
2618 if (limit_pages < (original_pages * 2))
2619 limit_pages = original_pages * 2;
2620
2621 size_t mapped_size, align_offset;
2622 span_t *span;
2623
2624retry:
2625 align_offset = 0;
2626 mapped_size = num_pages * _memory_page_size;
2627
2628 span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
2629 if (!span) {
2630 errno = ENOMEM;
2631 return 0;
2632 }
2633 ptr = pointer_offset(span, SPAN_HEADER_SIZE);
2634
2635 if ((uintptr_t)ptr & align_mask)
2636 ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
2637
2638 if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
2639 (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
2640 (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
2641 _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
2642 ++num_pages;
2643 if (num_pages > limit_pages) {
2644 errno = EINVAL;
2645 return 0;
2646 }
2647 goto retry;
2648 }
2649
2650 // Store page count in span_count
2652 span->span_count = (uint32_t)num_pages;
2653 span->align_offset = (uint32_t)align_offset;
2654 span->heap = heap;
2655 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
2656
2657#if RPMALLOC_FIRST_CLASS_HEAPS
2658 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
2659#endif
2660 ++heap->full_span_count;
2661
2662 _rpmalloc_stat_add64(&_allocation_counter, 1);
2663
2664 return ptr;
2665}
2666
2667////////////
2668///
2669/// Deallocation entry points
2670///
2671//////
2672
2673//! Deallocate the given small/medium memory block in the current thread local
2674//! heap
2676 void *block) {
2677 heap_t *heap = span->heap;
2679 !heap->owner_thread || heap->finalize,
2680 "Internal failure");
2681 // Add block to free list
2683 span->used_count = span->block_count;
2684#if RPMALLOC_FIRST_CLASS_HEAPS
2685 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
2686 span);
2687#endif
2689 &heap->size_class[span->size_class].partial_span, span);
2690 --heap->full_span_count;
2691 }
2692 *((void **)block) = span->free_list;
2693 --span->used_count;
2694 span->free_list = block;
2695 if (UNEXPECTED(span->used_count == span->list_size)) {
2696 // If there are no used blocks it is guaranteed that no other external
2697 // thread is accessing the span
2698 if (span->used_count) {
2699 // Make sure we have synchronized the deferred list and list size by using
2700 // acquire semantics and guarantee that no external thread is accessing
2701 // span concurrently
2702 void *free_list;
2703 do {
2706 } while (free_list == INVALID_POINTER);
2708 }
2710 &heap->size_class[span->size_class].partial_span, span);
2712 }
2713}
2714
2716 if (span->size_class != SIZE_CLASS_HUGE)
2717 _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred);
2718 // This list does not need ABA protection, no mutable side state
2719 do {
2720 span->free_list = (void *)atomic_load_ptr(&heap->span_free_deferred);
2721 } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
2722}
2723
2724//! Put the block in the deferred free list of the owning span
2726 void *block) {
2727 // The memory ordering here is a bit tricky, to avoid having to ABA protect
2728 // the deferred free list to avoid desynchronization of list and list size
2729 // we need to have acquire semantics on successful CAS of the pointer to
2730 // guarantee the list_size variable validity + release semantics on pointer
2731 // store
2732 void *free_list;
2733 do {
2734 free_list =
2736 } while (free_list == INVALID_POINTER);
2737 *((void **)block) = free_list;
2738 uint32_t free_count = ++span->list_size;
2739 int all_deferred_free = (free_count == span->block_count);
2741 if (all_deferred_free) {
2742 // Span was completely freed by this block. Due to the INVALID_POINTER spin
2743 // lock no other thread can reach this state simultaneously on this span.
2744 // Safe to move to owner heap deferred cache
2746 }
2747}
2748
2751 if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
2752 // Realign pointer to block start
2753 void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2754 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2755 p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
2756 }
2757 // Check if block belongs to this heap or if deallocation should be deferred
2758#if RPMALLOC_FIRST_CLASS_HEAPS
2759 int defer =
2760 (span->heap->owner_thread &&
2761 (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2762#else
2763 int defer =
2764 ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2765#endif
2766 if (!defer)
2768 else
2770}
2771
2772//! Deallocate the given large memory block to the current heap
2774 rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class");
2776 !(span->flags & SPAN_FLAG_SUBSPAN),
2777 "Span flag corrupted");
2779 (span->flags & SPAN_FLAG_SUBSPAN),
2780 "Span flag corrupted");
2781 // We must always defer (unless finalizing) if from another heap since we
2782 // cannot touch the list or counters of another heap
2783#if RPMALLOC_FIRST_CLASS_HEAPS
2784 int defer =
2785 (span->heap->owner_thread &&
2786 (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2787#else
2788 int defer =
2789 ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2790#endif
2791 if (defer) {
2793 return;
2794 }
2795 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
2796 --span->heap->full_span_count;
2797#if RPMALLOC_FIRST_CLASS_HEAPS
2798 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2799#endif
2800#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
2801 // Decrease counter
2802 size_t idx = span->span_count - 1;
2803 atomic_decr32(&span->heap->span_use[idx].current);
2804#endif
2805 heap_t *heap = span->heap;
2806 rpmalloc_assert(heap, "No thread heap");
2807#if ENABLE_THREAD_CACHE
2808 const int set_as_reserved =
2809 ((span->span_count > 1) && (heap->span_cache.count == 0) &&
2810 !heap->finalize && !heap->spans_reserved);
2811#else
2812 const int set_as_reserved =
2813 ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved);
2814#endif
2815 if (set_as_reserved) {
2816 heap->span_reserve = span;
2817 heap->spans_reserved = span->span_count;
2818 if (span->flags & SPAN_FLAG_MASTER) {
2819 heap->span_reserve_master = span;
2820 } else { // SPAN_FLAG_SUBSPAN
2821 span_t *master = (span_t *)pointer_offset(
2822 span,
2823 -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
2824 heap->span_reserve_master = master;
2825 rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
2826 rpmalloc_assert(atomic_load32(&master->remaining_spans) >=
2827 (int32_t)span->span_count,
2828 "Master span count corrupted");
2829 }
2830 _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
2831 } else {
2832 // Insert into cache list
2833 _rpmalloc_heap_cache_insert(heap, span);
2834 }
2835}
2836
2837//! Deallocate the given huge span
2839 rpmalloc_assert(span->heap, "No span heap");
2840#if RPMALLOC_FIRST_CLASS_HEAPS
2841 int defer =
2842 (span->heap->owner_thread &&
2843 (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2844#else
2845 int defer =
2846 ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2847#endif
2848 if (defer) {
2850 return;
2851 }
2852 rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
2853 --span->heap->full_span_count;
2854#if RPMALLOC_FIRST_CLASS_HEAPS
2855 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2856#endif
2857
2858 // Oversized allocation, page count is stored in span_count
2859 size_t num_pages = span->span_count;
2860 _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset,
2861 num_pages * _memory_page_size);
2862 _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
2863}
2864
2865//! Deallocate the given block
2866static void _rpmalloc_deallocate(void *p) {
2867 _rpmalloc_stat_add64(&_deallocation_counter, 1);
2868 // Grab the span (always at start of span, using span alignment)
2869 span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2870 if (UNEXPECTED(!span))
2871 return;
2874 else if (span->size_class == SIZE_CLASS_LARGE)
2876 else
2878}
2879
2880////////////
2881///
2882/// Reallocation entry points
2883///
2884//////
2885
2886static size_t _rpmalloc_usable_size(void *p);
2887
2888//! Reallocate the given block to the given size
2889static void *_rpmalloc_reallocate(heap_t *heap, void *p, size_t size,
2890 size_t oldsize, unsigned int flags) {
2891 if (p) {
2892 // Grab the span using guaranteed span alignment
2893 span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2894 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
2895 // Small/medium sized block
2896 rpmalloc_assert(span->span_count == 1, "Span counter corrupted");
2897 void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2898 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2899 uint32_t block_idx = block_offset / span->block_size;
2900 void *block =
2901 pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
2902 if (!oldsize)
2903 oldsize =
2904 (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
2905 if ((size_t)span->block_size >= size) {
2906 // Still fits in block, never mind trying to save memory, but preserve
2907 // data if alignment changed
2908 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2909 memmove(block, p, oldsize);
2910 return block;
2911 }
2912 } else if (span->size_class == SIZE_CLASS_LARGE) {
2913 // Large block
2914 size_t total_size = size + SPAN_HEADER_SIZE;
2915 size_t num_spans = total_size >> _memory_span_size_shift;
2916 if (total_size & (_memory_span_mask - 1))
2917 ++num_spans;
2918 size_t current_spans = span->span_count;
2919 void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2920 if (!oldsize)
2921 oldsize = (current_spans * _memory_span_size) -
2922 (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2923 if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) {
2924 // Still fits in block, never mind trying to save memory, but preserve
2925 // data if alignment changed
2926 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2927 memmove(block, p, oldsize);
2928 return block;
2929 }
2930 } else {
2931 // Oversized block
2932 size_t total_size = size + SPAN_HEADER_SIZE;
2933 size_t num_pages = total_size >> _memory_page_size_shift;
2934 if (total_size & (_memory_page_size - 1))
2935 ++num_pages;
2936 // Page count is stored in span_count
2937 size_t current_pages = span->span_count;
2938 void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2939 if (!oldsize)
2940 oldsize = (current_pages * _memory_page_size) -
2941 (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2942 if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
2943 // Still fits in block, never mind trying to save memory, but preserve
2944 // data if alignment changed
2945 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2946 memmove(block, p, oldsize);
2947 return block;
2948 }
2949 }
2950 } else {
2951 oldsize = 0;
2952 }
2953
2954 if (!!(flags & RPMALLOC_GROW_OR_FAIL))
2955 return 0;
2956
2957 // Size is greater than block size, need to allocate a new block and
2958 // deallocate the old Avoid hysteresis by overallocating if increase is small
2959 // (below 37%)
2960 size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
2961 size_t new_size =
2962 (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
2963 void *block = _rpmalloc_allocate(heap, new_size);
2964 if (p && block) {
2965 if (!(flags & RPMALLOC_NO_PRESERVE))
2966 memcpy(block, p, oldsize < new_size ? oldsize : new_size);
2968 }
2969
2970 return block;
2971}
2972
2973static void *_rpmalloc_aligned_reallocate(heap_t *heap, void *ptr,
2974 size_t alignment, size_t size,
2975 size_t oldsize, unsigned int flags) {
2976 if (alignment <= SMALL_GRANULARITY)
2977 return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
2978
2979 int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
2980 size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0);
2981 if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
2982 if (no_alloc || (size >= (usablesize / 2)))
2983 return ptr;
2984 }
2985 // Aligned alloc marks span as having aligned blocks
2986 void *block =
2987 (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
2988 if (EXPECTED(block != 0)) {
2989 if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
2990 if (!oldsize)
2991 oldsize = usablesize;
2992 memcpy(block, ptr, oldsize < size ? oldsize : size);
2993 }
2995 }
2996 return block;
2997}
2998
2999////////////
3000///
3001/// Initialization, finalization and utility
3002///
3003//////
3004
3005//! Get the usable size of the given block
3006static size_t _rpmalloc_usable_size(void *p) {
3007 // Grab the span using guaranteed span alignment
3008 span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
3009 if (span->size_class < SIZE_CLASS_COUNT) {
3010 // Small/medium block
3011 void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
3012 return span->block_size -
3013 ((size_t)pointer_diff(p, blocks_start) % span->block_size);
3014 }
3015 if (span->size_class == SIZE_CLASS_LARGE) {
3016 // Large block
3017 size_t current_spans = span->span_count;
3018 return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
3019 }
3020 // Oversized block, page count is stored in span_count
3021 size_t current_pages = span->span_count;
3022 return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
3023}
3024
3025//! Adjust and optimize the size class properties for the given class
3026static void _rpmalloc_adjust_size_class(size_t iclass) {
3027 size_t block_size = _memory_size_class[iclass].block_size;
3028 size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
3029
3030 _memory_size_class[iclass].block_count = (uint16_t)block_count;
3031 _memory_size_class[iclass].class_idx = (uint16_t)iclass;
3032
3033 // Check if previous size classes can be merged
3034 if (iclass >= SMALL_CLASS_COUNT) {
3035 size_t prevclass = iclass;
3036 while (prevclass > 0) {
3037 --prevclass;
3038 // A class can be merged if number of pages and number of blocks are equal
3039 if (_memory_size_class[prevclass].block_count ==
3040 _memory_size_class[iclass].block_count)
3042 _memory_size_class + iclass,
3043 sizeof(_memory_size_class[iclass]));
3044 else
3045 break;
3046 }
3047 }
3048}
3049
3050//! Initialize the allocator and setup global data
3051extern inline int rpmalloc_initialize(void) {
3054 return 0;
3055 }
3057}
3058
3062 return 0;
3063 }
3065
3066 if (config)
3067 memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
3068 else
3070
3074 }
3075
3076#if PLATFORM_WINDOWS
3077 SYSTEM_INFO system_info;
3078 memset(&system_info, 0, sizeof(system_info));
3079 GetSystemInfo(&system_info);
3080 _memory_map_granularity = system_info.dwAllocationGranularity;
3081#else
3082 _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
3083#endif
3084
3085#if RPMALLOC_CONFIGURABLE
3087#else
3089#endif
3091 if (!_memory_page_size) {
3092#if PLATFORM_WINDOWS
3093 _memory_page_size = system_info.dwPageSize;
3094#else
3097#if defined(__linux__)
3098 size_t huge_page_size = 0;
3099 FILE *meminfo = fopen("/proc/meminfo", "r");
3100 if (meminfo) {
3101 char line[128];
3102 while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
3103 line[sizeof(line) - 1] = 0;
3104 if (strstr(line, "Hugepagesize:"))
3105 huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
3106 }
3107 fclose(meminfo);
3108 }
3109 if (huge_page_size) {
3111 _memory_page_size = huge_page_size;
3112 _memory_map_granularity = huge_page_size;
3113 }
3114#elif defined(__FreeBSD__)
3115 int rc;
3116 size_t sz = sizeof(rc);
3117
3118 if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 &&
3119 rc == 1) {
3120 static size_t defsize = 2 * 1024 * 1024;
3121 int nsize = 0;
3122 size_t sizes[4] = {0};
3124 _memory_page_size = defsize;
3125 if ((nsize = getpagesizes(sizes, 4)) >= 2) {
3126 nsize--;
3127 for (size_t csize = sizes[nsize]; nsize >= 0 && csize;
3128 --nsize, csize = sizes[nsize]) {
3129 //! Unlikely, but as a precaution..
3130 rpmalloc_assert(!(csize & (csize - 1)) && !(csize % 1024),
3131 "Invalid page size");
3132 if (defsize < csize) {
3133 _memory_page_size = csize;
3134 break;
3135 }
3136 }
3137 }
3139 }
3140#elif defined(__APPLE__) || defined(__NetBSD__)
3142 _memory_page_size = 2 * 1024 * 1024;
3144#endif
3145 }
3146#endif
3147 } else {
3150 }
3151
3152#if PLATFORM_WINDOWS
3154 HANDLE token = 0;
3155 size_t large_page_minimum = GetLargePageMinimum();
3156 if (large_page_minimum)
3157 OpenProcessToken(GetCurrentProcess(),
3158 TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
3159 if (token) {
3160 LUID luid;
3161 if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
3162 TOKEN_PRIVILEGES token_privileges;
3163 memset(&token_privileges, 0, sizeof(token_privileges));
3164 token_privileges.PrivilegeCount = 1;
3165 token_privileges.Privileges[0].Luid = luid;
3166 token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
3167 if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
3168 if (GetLastError() == ERROR_SUCCESS)
3170 }
3171 }
3172 CloseHandle(token);
3173 }
3174 if (_memory_huge_pages) {
3175 if (large_page_minimum > _memory_page_size)
3176 _memory_page_size = large_page_minimum;
3177 if (large_page_minimum > _memory_map_granularity)
3178 _memory_map_granularity = large_page_minimum;
3179 }
3180 }
3181#endif
3182
3183 size_t min_span_size = 256;
3184 size_t max_page_size;
3185#if UINTPTR_MAX > 0xFFFFFFFF
3186 max_page_size = 4096ULL * 1024ULL * 1024ULL;
3187#else
3188 max_page_size = 4 * 1024 * 1024;
3189#endif
3190 if (_memory_page_size < min_span_size)
3191 _memory_page_size = min_span_size;
3192 if (_memory_page_size > max_page_size)
3193 _memory_page_size = max_page_size;
3195 size_t page_size_bit = _memory_page_size;
3196 while (page_size_bit != 1) {
3198 page_size_bit >>= 1;
3199 }
3201
3202#if RPMALLOC_CONFIGURABLE
3207 } else {
3208 size_t span_size = _memory_config.span_size;
3209 if (span_size > (256 * 1024))
3210 span_size = (256 * 1024);
3211 _memory_span_size = 4096;
3213 while (_memory_span_size < span_size) {
3214 _memory_span_size <<= 1;
3216 }
3217 _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
3218 }
3219#endif
3220
3232
3237
3238#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) || \
3239 defined(__TINYC__)
3240 if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc))
3241 return -1;
3242#endif
3243#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
3244 fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
3245#endif
3246
3247 // Setup all small and medium size classes
3248 size_t iclass = 0;
3251 for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
3252 size_t size = iclass * SMALL_GRANULARITY;
3253 _memory_size_class[iclass].block_size = (uint32_t)size;
3255 }
3256 // At least two blocks per span, then fall back to large allocations
3260 for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
3261 size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
3262 if (size > _memory_medium_size_limit) {
3265 break;
3266 }
3269 }
3270
3272#if RPMALLOC_FIRST_CLASS_HEAPS
3273 _memory_first_class_orphan_heaps = 0;
3274#endif
3275#if ENABLE_STATISTICS
3276 atomic_store32(&_memory_active_heaps, 0);
3277 atomic_store32(&_mapped_pages, 0);
3278 _mapped_pages_peak = 0;
3279 atomic_store32(&_master_spans, 0);
3280 atomic_store32(&_mapped_total, 0);
3281 atomic_store32(&_unmapped_total, 0);
3282 atomic_store32(&_mapped_pages_os, 0);
3283 atomic_store32(&_huge_pages_current, 0);
3284 _huge_pages_peak = 0;
3285#endif
3286 memset(_memory_heaps, 0, sizeof(_memory_heaps));
3288
3290
3291 // Initialize this thread
3293 return 0;
3294}
3295
3296//! Finalize the allocator
3299 // rpmalloc_dump_statistics(stdout);
3300
3307 }
3309
3310 // Free all thread caches and fully free spans
3311 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
3312 heap_t *heap = _memory_heaps[list_idx];
3313 while (heap) {
3314 heap_t *next_heap = heap->next_heap;
3315 heap->finalize = 1;
3317 heap = next_heap;
3318 }
3319 }
3320
3321#if ENABLE_GLOBAL_CACHE
3322 // Free global caches
3323 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
3324 _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
3325#endif
3326
3327#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
3328 pthread_key_delete(_memory_thread_heap);
3329#endif
3330#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
3331 FlsFree(fls_key);
3332 fls_key = 0;
3333#endif
3334#if ENABLE_STATISTICS
3335 // If you hit these asserts you probably have memory leaks (perhaps global
3336 // scope data doing dynamic allocations) or double frees in your code
3337 rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected");
3338 rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0,
3339 "Memory leak detected");
3340#endif
3341
3343}
3344
3345//! Initialize thread, assign heap
3346extern inline void rpmalloc_thread_initialize(void) {
3347 if (!get_thread_heap_raw()) {
3349 if (heap) {
3350 _rpmalloc_stat_inc(&_memory_active_heaps);
3351 set_thread_heap(heap);
3352#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
3353 FlsSetValue(fls_key, heap);
3354#endif
3355 }
3356 }
3357}
3358
3359//! Finalize thread, orphan heap
3360void rpmalloc_thread_finalize(int release_caches) {
3361 heap_t *heap = get_thread_heap_raw();
3362 if (heap)
3363 _rpmalloc_heap_release_raw(heap, release_caches);
3364 set_thread_heap(0);
3365#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
3366 FlsSetValue(fls_key, 0);
3367#endif
3368}
3369
3371 return (get_thread_heap_raw() != 0) ? 1 : 0;
3372}
3373
3375
3376// Extern interface
3377
3378extern inline RPMALLOC_ALLOCATOR void *rpmalloc(size_t size) {
3379#if ENABLE_VALIDATE_ARGS
3380 if (size >= MAX_ALLOC_SIZE) {
3381 errno = EINVAL;
3382 return 0;
3383 }
3384#endif
3385 heap_t *heap = get_thread_heap();
3386 return _rpmalloc_allocate(heap, size);
3387}
3388
3389extern inline void rpfree(void *ptr) { _rpmalloc_deallocate(ptr); }
3390
3391extern inline RPMALLOC_ALLOCATOR void *rpcalloc(size_t num, size_t size) {
3392 size_t total;
3393#if ENABLE_VALIDATE_ARGS
3394#if PLATFORM_WINDOWS
3395 int err = SizeTMult(num, size, &total);
3396 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
3397 errno = EINVAL;
3398 return 0;
3399 }
3400#else
3401 int err = __builtin_umull_overflow(num, size, &total);
3402 if (err || (total >= MAX_ALLOC_SIZE)) {
3403 errno = EINVAL;
3404 return 0;
3405 }
3406#endif
3407#else
3408 total = num * size;
3409#endif
3410 heap_t *heap = get_thread_heap();
3411 void *block = _rpmalloc_allocate(heap, total);
3412 if (block)
3413 memset(block, 0, total);
3414 return block;
3415}
3416
3417extern inline RPMALLOC_ALLOCATOR void *rprealloc(void *ptr, size_t size) {
3418#if ENABLE_VALIDATE_ARGS
3419 if (size >= MAX_ALLOC_SIZE) {
3420 errno = EINVAL;
3421 return ptr;
3422 }
3423#endif
3424 heap_t *heap = get_thread_heap();
3425 return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
3426}
3427
3428extern RPMALLOC_ALLOCATOR void *rpaligned_realloc(void *ptr, size_t alignment,
3429 size_t size, size_t oldsize,
3430 unsigned int flags) {
3431#if ENABLE_VALIDATE_ARGS
3432 if ((size + alignment < size) || (alignment > _memory_page_size)) {
3433 errno = EINVAL;
3434 return 0;
3435 }
3436#endif
3437 heap_t *heap = get_thread_heap();
3438 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize,
3439 flags);
3440}
3441
3442extern RPMALLOC_ALLOCATOR void *rpaligned_alloc(size_t alignment, size_t size) {
3443 heap_t *heap = get_thread_heap();
3444 return _rpmalloc_aligned_allocate(heap, alignment, size);
3445}
3446
3447extern inline RPMALLOC_ALLOCATOR void *
3448rpaligned_calloc(size_t alignment, size_t num, size_t size) {
3449 size_t total;
3450#if ENABLE_VALIDATE_ARGS
3451#if PLATFORM_WINDOWS
3452 int err = SizeTMult(num, size, &total);
3453 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
3454 errno = EINVAL;
3455 return 0;
3456 }
3457#else
3458 int err = __builtin_umull_overflow(num, size, &total);
3459 if (err || (total >= MAX_ALLOC_SIZE)) {
3460 errno = EINVAL;
3461 return 0;
3462 }
3463#endif
3464#else
3465 total = num * size;
3466#endif
3467 void *block = rpaligned_alloc(alignment, total);
3468 if (block)
3469 memset(block, 0, total);
3470 return block;
3471}
3472
3473extern inline RPMALLOC_ALLOCATOR void *rpmemalign(size_t alignment,
3474 size_t size) {
3475 return rpaligned_alloc(alignment, size);
3476}
3477
3478extern inline int rpposix_memalign(void **memptr, size_t alignment,
3479 size_t size) {
3480 if (memptr)
3481 *memptr = rpaligned_alloc(alignment, size);
3482 else
3483 return EINVAL;
3484 return *memptr ? 0 : ENOMEM;
3485}
3486
3487extern inline size_t rpmalloc_usable_size(void *ptr) {
3488 return (ptr ? _rpmalloc_usable_size(ptr) : 0);
3489}
3490
3491extern inline void rpmalloc_thread_collect(void) {}
3492
3494 memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
3495 heap_t *heap = get_thread_heap_raw();
3496 if (!heap)
3497 return;
3498
3499 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3500 size_class_t *size_class = _memory_size_class + iclass;
3501 span_t *span = heap->size_class[iclass].partial_span;
3502 while (span) {
3503 size_t free_count = span->list_size;
3504 size_t block_count = size_class->block_count;
3505 if (span->free_list_limit < block_count)
3506 block_count = span->free_list_limit;
3507 free_count += (block_count - span->used_count);
3508 stats->sizecache += free_count * size_class->block_size;
3509 span = span->next;
3510 }
3511 }
3512
3513#if ENABLE_THREAD_CACHE
3514 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3515 span_cache_t *span_cache;
3516 if (!iclass)
3517 span_cache = &heap->span_cache;
3518 else
3519 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
3520 stats->spancache += span_cache->count * (iclass + 1) * _memory_span_size;
3521 }
3522#endif
3523
3524 span_t *deferred = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
3525 while (deferred) {
3526 if (deferred->size_class != SIZE_CLASS_HUGE)
3527 stats->spancache += (size_t)deferred->span_count * _memory_span_size;
3528 deferred = (span_t *)deferred->free_list;
3529 }
3530
3531#if ENABLE_STATISTICS
3532 stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
3533 stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
3534
3535 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3536 stats->span_use[iclass].current =
3537 (size_t)atomic_load32(&heap->span_use[iclass].current);
3538 stats->span_use[iclass].peak =
3539 (size_t)atomic_load32(&heap->span_use[iclass].high);
3540 stats->span_use[iclass].to_global =
3541 (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
3542 stats->span_use[iclass].from_global =
3543 (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
3544 stats->span_use[iclass].to_cache =
3545 (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
3546 stats->span_use[iclass].from_cache =
3547 (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
3548 stats->span_use[iclass].to_reserved =
3549 (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
3550 stats->span_use[iclass].from_reserved =
3551 (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
3552 stats->span_use[iclass].map_calls =
3553 (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
3554 }
3555 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3556 stats->size_use[iclass].alloc_current =
3557 (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
3558 stats->size_use[iclass].alloc_peak =
3559 (size_t)heap->size_class_use[iclass].alloc_peak;
3560 stats->size_use[iclass].alloc_total =
3561 (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
3562 stats->size_use[iclass].free_total =
3563 (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
3564 stats->size_use[iclass].spans_to_cache =
3565 (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
3566 stats->size_use[iclass].spans_from_cache =
3567 (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
3568 stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(
3569 &heap->size_class_use[iclass].spans_from_reserved);
3570 stats->size_use[iclass].map_calls =
3571 (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
3572 }
3573#endif
3574}
3575
3577 memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
3578#if ENABLE_STATISTICS
3579 stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
3580 stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
3581 stats->mapped_total =
3582 (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
3583 stats->unmapped_total =
3584 (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
3585 stats->huge_alloc =
3586 (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
3587 stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
3588#endif
3589#if ENABLE_GLOBAL_CACHE
3590 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3591 global_cache_t *cache = &_memory_span_cache[iclass];
3592 while (!atomic_cas32_acquire(&cache->lock, 1, 0))
3594 uint32_t count = cache->count;
3595#if ENABLE_UNLIMITED_CACHE
3596 span_t *current_span = cache->overflow;
3597 while (current_span) {
3598 ++count;
3599 current_span = current_span->next;
3600 }
3601#endif
3602 atomic_store32_release(&cache->lock, 0);
3603 stats->cached += count * (iclass + 1) * _memory_span_size;
3604 }
3605#endif
3606}
3607
3608#if ENABLE_STATISTICS
3609
3610static void _memory_heap_dump_statistics(heap_t *heap, void *file) {
3611 fprintf(file, "Heap %d stats:\n", heap->id);
3612 fprintf(file, "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize "
3613 "BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB "
3614 "FromCacheMiB FromReserveMiB MmapCalls\n");
3615 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3616 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
3617 continue;
3618 fprintf(
3619 file,
3620 "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu "
3621 "%9u\n",
3622 (uint32_t)iclass,
3623 atomic_load32(&heap->size_class_use[iclass].alloc_current),
3624 heap->size_class_use[iclass].alloc_peak,
3625 atomic_load32(&heap->size_class_use[iclass].alloc_total),
3626 atomic_load32(&heap->size_class_use[iclass].free_total),
3629 atomic_load32(&heap->size_class_use[iclass].spans_current),
3630 heap->size_class_use[iclass].spans_peak,
3631 ((size_t)heap->size_class_use[iclass].alloc_peak *
3632 (size_t)_memory_size_class[iclass].block_size) /
3633 (size_t)(1024 * 1024),
3634 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) *
3636 (size_t)(1024 * 1024),
3637 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) *
3639 (size_t)(1024 * 1024),
3640 ((size_t)atomic_load32(
3641 &heap->size_class_use[iclass].spans_from_reserved) *
3643 (size_t)(1024 * 1024),
3644 atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
3645 }
3646 fprintf(file, "Spans Current Peak Deferred PeakMiB Cached ToCacheMiB "
3647 "FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB "
3648 "FromGlobalMiB MmapCalls\n");
3649 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3650 if (!atomic_load32(&heap->span_use[iclass].high) &&
3651 !atomic_load32(&heap->span_use[iclass].spans_map_calls))
3652 continue;
3653 fprintf(
3654 file,
3655 "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n",
3656 (uint32_t)(iclass + 1), atomic_load32(&heap->span_use[iclass].current),
3657 atomic_load32(&heap->span_use[iclass].high),
3658 atomic_load32(&heap->span_use[iclass].spans_deferred),
3659 ((size_t)atomic_load32(&heap->span_use[iclass].high) *
3660 (size_t)_memory_span_size * (iclass + 1)) /
3661 (size_t)(1024 * 1024),
3663 (unsigned int)(!iclass ? heap->span_cache.count
3664 : heap->span_large_cache[iclass - 1].count),
3665 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) *
3666 (iclass + 1) * _memory_span_size) /
3667 (size_t)(1024 * 1024),
3668 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) *
3669 (iclass + 1) * _memory_span_size) /
3670 (size_t)(1024 * 1024),
3671#else
3672 0, (size_t)0, (size_t)0,
3673#endif
3674 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) *
3675 (iclass + 1) * _memory_span_size) /
3676 (size_t)(1024 * 1024),
3677 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) *
3678 (iclass + 1) * _memory_span_size) /
3679 (size_t)(1024 * 1024),
3680 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) *
3681 (size_t)_memory_span_size * (iclass + 1)) /
3682 (size_t)(1024 * 1024),
3683 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) *
3684 (size_t)_memory_span_size * (iclass + 1)) /
3685 (size_t)(1024 * 1024),
3686 atomic_load32(&heap->span_use[iclass].spans_map_calls));
3687 }
3688 fprintf(file, "Full spans: %zu\n", heap->full_span_count);
3689 fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
3690 fprintf(
3691 file, "%17zu %17zu\n",
3692 (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
3693 (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
3694}
3695
3696#endif
3697
3699#if ENABLE_STATISTICS
3700 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
3701 heap_t *heap = _memory_heaps[list_idx];
3702 while (heap) {
3703 int need_dump = 0;
3704 for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT);
3705 ++iclass) {
3706 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
3708 !atomic_load32(&heap->size_class_use[iclass].free_total),
3709 "Heap statistics counter mismatch");
3711 !atomic_load32(&heap->size_class_use[iclass].spans_map_calls),
3712 "Heap statistics counter mismatch");
3713 continue;
3714 }
3715 need_dump = 1;
3716 }
3717 for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT);
3718 ++iclass) {
3719 if (!atomic_load32(&heap->span_use[iclass].high) &&
3720 !atomic_load32(&heap->span_use[iclass].spans_map_calls))
3721 continue;
3722 need_dump = 1;
3723 }
3724 if (need_dump)
3725 _memory_heap_dump_statistics(heap, file);
3726 heap = heap->next_heap;
3727 }
3728 }
3729 fprintf(file, "Global stats:\n");
3730 size_t huge_current =
3731 (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
3732 size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
3733 fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
3734 fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024),
3735 huge_peak / (size_t)(1024 * 1024));
3736
3737#if ENABLE_GLOBAL_CACHE
3738 fprintf(file, "GlobalCacheMiB\n");
3739 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3740 global_cache_t *cache = _memory_span_cache + iclass;
3741 size_t global_cache = (size_t)cache->count * iclass * _memory_span_size;
3742
3743 size_t global_overflow_cache = 0;
3744 span_t *span = cache->overflow;
3745 while (span) {
3746 global_overflow_cache += iclass * _memory_span_size;
3747 span = span->next;
3748 }
3749 if (global_cache || global_overflow_cache || cache->insert_count ||
3750 cache->extract_count)
3751 fprintf(file,
3752 "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n",
3753 iclass + 1, global_cache / (size_t)(1024 * 1024),
3754 global_overflow_cache / (size_t)(1024 * 1024),
3755 cache->insert_count, cache->extract_count);
3756 }
3757#endif
3758
3759 size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
3760 size_t mapped_os =
3761 (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
3762 size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
3763 size_t mapped_total =
3764 (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
3765 size_t unmapped_total =
3766 (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
3767 fprintf(
3768 file,
3769 "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n");
3770 fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n",
3771 mapped / (size_t)(1024 * 1024), mapped_os / (size_t)(1024 * 1024),
3772 mapped_peak / (size_t)(1024 * 1024),
3773 mapped_total / (size_t)(1024 * 1024),
3774 unmapped_total / (size_t)(1024 * 1024));
3775
3776 fprintf(file, "\n");
3777#if 0
3778 int64_t allocated = atomic_load64(&_allocation_counter);
3779 int64_t deallocated = atomic_load64(&_deallocation_counter);
3780 fprintf(file, "Allocation count: %lli\n", allocated);
3781 fprintf(file, "Deallocation count: %lli\n", deallocated);
3782 fprintf(file, "Current allocations: %lli\n", (allocated - deallocated));
3783 fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans));
3784 fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans));
3785#endif
3786#endif
3787 (void)sizeof(file);
3788}
3789
3790#if RPMALLOC_FIRST_CLASS_HEAPS
3791
3792extern inline rpmalloc_heap_t *rpmalloc_heap_acquire(void) {
3793 // Must be a pristine heap from newly mapped memory pages, or else memory
3794 // blocks could already be allocated from the heap which would (wrongly) be
3795 // released when heap is cleared with rpmalloc_heap_free_all(). Also heaps
3796 // guaranteed to be pristine from the dedicated orphan list can be used.
3798 rpmalloc_assume(heap != NULL);
3799 heap->owner_thread = 0;
3800 _rpmalloc_stat_inc(&_memory_active_heaps);
3801 return heap;
3802}
3803
3804extern inline void rpmalloc_heap_release(rpmalloc_heap_t *heap) {
3805 if (heap)
3806 _rpmalloc_heap_release(heap, 1, 1);
3807}
3808
3809extern inline RPMALLOC_ALLOCATOR void *
3810rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
3811#if ENABLE_VALIDATE_ARGS
3812 if (size >= MAX_ALLOC_SIZE) {
3813 errno = EINVAL;
3814 return 0;
3815 }
3816#endif
3817 return _rpmalloc_allocate(heap, size);
3818}
3819
3820extern inline RPMALLOC_ALLOCATOR void *
3821rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
3822 size_t size) {
3823#if ENABLE_VALIDATE_ARGS
3824 if (size >= MAX_ALLOC_SIZE) {
3825 errno = EINVAL;
3826 return 0;
3827 }
3828#endif
3829 return _rpmalloc_aligned_allocate(heap, alignment, size);
3830}
3831
3832extern inline RPMALLOC_ALLOCATOR void *
3833rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
3834 return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
3835}
3836
3837extern inline RPMALLOC_ALLOCATOR void *
3838rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
3839 size_t num, size_t size) {
3840 size_t total;
3841#if ENABLE_VALIDATE_ARGS
3842#if PLATFORM_WINDOWS
3843 int err = SizeTMult(num, size, &total);
3844 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
3845 errno = EINVAL;
3846 return 0;
3847 }
3848#else
3849 int err = __builtin_umull_overflow(num, size, &total);
3850 if (err || (total >= MAX_ALLOC_SIZE)) {
3851 errno = EINVAL;
3852 return 0;
3853 }
3854#endif
3855#else
3856 total = num * size;
3857#endif
3858 void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
3859 if (block)
3860 memset(block, 0, total);
3861 return block;
3862}
3863
3864extern inline RPMALLOC_ALLOCATOR void *
3865rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
3866 unsigned int flags) {
3867#if ENABLE_VALIDATE_ARGS
3868 if (size >= MAX_ALLOC_SIZE) {
3869 errno = EINVAL;
3870 return ptr;
3871 }
3872#endif
3873 return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
3874}
3875
3876extern inline RPMALLOC_ALLOCATOR void *
3877rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr,
3878 size_t alignment, size_t size,
3879 unsigned int flags) {
3880#if ENABLE_VALIDATE_ARGS
3881 if ((size + alignment < size) || (alignment > _memory_page_size)) {
3882 errno = EINVAL;
3883 return 0;
3884 }
3885#endif
3886 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
3887}
3888
3889extern inline void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
3890 (void)sizeof(heap);
3892}
3893
3894extern inline void rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
3895 span_t *span;
3896 span_t *next_span;
3897
3899
3900 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3901 span = heap->size_class[iclass].partial_span;
3902 while (span) {
3903 next_span = span->next;
3904 _rpmalloc_heap_cache_insert(heap, span);
3905 span = next_span;
3906 }
3907 heap->size_class[iclass].partial_span = 0;
3908 span = heap->full_span[iclass];
3909 while (span) {
3910 next_span = span->next;
3911 _rpmalloc_heap_cache_insert(heap, span);
3912 span = next_span;
3913 }
3914
3915 span = heap->size_class[iclass].cache;
3916 if (span)
3917 _rpmalloc_heap_cache_insert(heap, span);
3918 heap->size_class[iclass].cache = 0;
3919 }
3920 memset(heap->size_class, 0, sizeof(heap->size_class));
3921 memset(heap->full_span, 0, sizeof(heap->full_span));
3922
3923 span = heap->large_huge_span;
3924 while (span) {
3925 next_span = span->next;
3928 else
3929 _rpmalloc_heap_cache_insert(heap, span);
3930 span = next_span;
3931 }
3932 heap->large_huge_span = 0;
3933 heap->full_span_count = 0;
3934
3935#if ENABLE_THREAD_CACHE
3936 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3937 span_cache_t *span_cache;
3938 if (!iclass)
3939 span_cache = &heap->span_cache;
3940 else
3941 span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
3942 if (!span_cache->count)
3943 continue;
3944#if ENABLE_GLOBAL_CACHE
3945 _rpmalloc_stat_add64(&heap->thread_to_global,
3946 span_cache->count * (iclass + 1) * _memory_span_size);
3947 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
3948 span_cache->count);
3949 _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
3950 span_cache->count);
3951#else
3952 for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
3953 _rpmalloc_span_unmap(span_cache->span[ispan]);
3954#endif
3955 span_cache->count = 0;
3956 }
3957#endif
3958
3959#if ENABLE_STATISTICS
3960 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3961 atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
3962 atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
3963 }
3964 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3965 atomic_store32(&heap->span_use[iclass].current, 0);
3966 }
3967#endif
3968}
3969
3970extern inline void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
3971 heap_t *prev_heap = get_thread_heap_raw();
3972 if (prev_heap != heap) {
3973 set_thread_heap(heap);
3974 if (prev_heap)
3975 rpmalloc_heap_release(prev_heap);
3976 }
3977}
3978
3979extern inline rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr) {
3980 // Grab the span, and then the heap from the span
3981 span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
3982 if (span) {
3983 return span->heap;
3984 }
3985 return 0;
3986}
3987
3988#endif
3989
3990#if ENABLE_PRELOAD || ENABLE_OVERRIDE
3991
3992#include "malloc.c"
3993
3994#endif
3995
Given that RA is a live value
#define rc(i)
if(PassOpts->AAPipeline)
dot regions Print regions of function to dot file(with no function bodies)"
static const char * name
Definition: SMEABIPass.cpp:52
unify loop Fixup each natural loop to have a single exit block
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1702
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
#define SMALL_GRANULARITY
Preconfigured limits and sizes.
Definition: rpmalloc.c:456
static FORCEINLINE void * atomic_exchange_ptr_acquire(atomicptr_t *dst, void *val)
Definition: rpmalloc.c:375
#define LARGE_CLASS_COUNT
Number of large block size classes.
Definition: rpmalloc.c:472
#define _memory_default_span_size
Global data.
Definition: rpmalloc.c:762
static FORCEINLINE int64_t atomic_load64(atomic64_t *val)
Definition: rpmalloc.c:360
#define _memory_span_size_shift
Definition: rpmalloc.c:788
static void * _rpmalloc_allocate_from_heap_fallback(heap_t *heap, heap_size_class_t *heap_size_class, uint32_t class_idx)
Allocate a small/medium sized memory block from the given heap.
Definition: rpmalloc.c:2380
static heap_t * _rpmalloc_heap_allocate(int first_class)
Allocate a new heap, potentially reusing a previously orphaned heap.
Definition: rpmalloc.c:2227
#define MEDIUM_GRANULARITY
Granularity of a medium allocation block.
Definition: rpmalloc.c:464
static void _rpmalloc_heap_unmap(heap_t *heap)
Definition: rpmalloc.c:1834
_Static_assert((SMALL_GRANULARITY &(SMALL_GRANULARITY - 1))==0, "Small granularity must be power of two")
#define SPAN_FLAG_UNMAPPED_MASTER
Flag indicating an unmapped master span.
Definition: rpmalloc.c:538
static heap_t * get_thread_heap_raw(void)
Definition: rpmalloc.c:877
void rpfree(void *ptr)
Free the given memory block.
Definition: rpmalloc.c:3389
#define rpmalloc_assume(cond)
Definition: rpmalloc.c:79
static size_t _memory_global_reserve_count
Global reserved count.
Definition: rpmalloc.c:810
static void _rpmalloc_span_unmap(span_t *span)
Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings)
Definition: rpmalloc.c:1413
static void _rpmalloc_heap_release(void *heapptr, int first_class, int release_cache)
Definition: rpmalloc.c:2245
static atomic32_t _memory_global_lock
Used to restrict access to mapping memory for huge pages.
Definition: rpmalloc.c:816
static span_t * _memory_global_reserve_master
Global reserved master.
Definition: rpmalloc.c:812
static size_t _memory_map_granularity
Granularity at which memory pages are mapped by OS.
Definition: rpmalloc.c:777
static FORCEINLINE int32_t atomic_incr32(atomic32_t *val)
Definition: rpmalloc.c:343
static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val, int32_t ref)
Definition: rpmalloc.c:352
static void _rpmalloc_adjust_size_class(size_t iclass)
Adjust and optimize the size class properties for the given class.
Definition: rpmalloc.c:3026
#define THREAD_SPAN_CACHE_TRANSFER
Number of spans to transfer between thread and global cache.
Definition: rpmalloc.c:485
RPMALLOC_ALLOCATOR void * rpmalloc(size_t size)
Allocate a memory block of at least the given size.
Definition: rpmalloc.c:3378
static void * _rpmalloc_aligned_reallocate(heap_t *heap, void *ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags)
Definition: rpmalloc.c:2973
#define SIZE_CLASS_HUGE
Definition: rpmalloc.c:510
static void _rpmalloc_heap_finalize(heap_t *heap)
Definition: rpmalloc.c:2311
static span_t * _rpmalloc_span_map_aligned_count(heap_t *heap, size_t span_count)
Map an aligned set of spans, taking configured mapping granularity and the page size into account.
Definition: rpmalloc.c:1325
static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val)
Definition: rpmalloc.c:357
int rpmalloc_initialize_config(const rpmalloc_config_t *config)
Initialize allocator with given configuration.
Definition: rpmalloc.c:3059
#define THREAD_SPAN_LARGE_CACHE_TRANSFER
Number of spans to transfer between thread and global cache for large spans.
Definition: rpmalloc.c:490
static void _rpmalloc_heap_global_finalize(heap_t *heap)
Definition: rpmalloc.c:1847
#define SPAN_FLAG_SUBSPAN
Flag indicating span is a secondary (sub) span of a split superspan.
Definition: rpmalloc.c:534
static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref)
Definition: rpmalloc.c:379
static void _rpmalloc_spin(void)
Definition: rpmalloc.c:949
static size_class_t _memory_size_class[SIZE_CLASS_COUNT]
Global size classes.
Definition: rpmalloc.c:796
static size_t _memory_page_size
Memory page size.
Definition: rpmalloc.c:773
static void _rpmalloc_heap_release_raw_fc(void *heapptr)
Definition: rpmalloc.c:2307
static void * _rpmalloc_allocate(heap_t *heap, size_t size)
Allocate a block of the given size.
Definition: rpmalloc.c:2534
#define TLS_MODEL
Thread local heap and ID.
Definition: rpmalloc.c:866
static size_t _memory_span_map_count
Number of spans to map in each map call.
Definition: rpmalloc.c:792
static void * _rpmalloc_allocate_small(heap_t *heap, size_t size)
Allocate a small sized memory block from the given heap.
Definition: rpmalloc.c:2444
#define _memory_default_span_size_shift
Definition: rpmalloc.c:763
static heap_t * _memory_heaps[HEAP_ARRAY_SIZE]
All heaps.
Definition: rpmalloc.c:814
#define _memory_span_mask
Definition: rpmalloc.c:789
#define SIZE_CLASS_LARGE
Definition: rpmalloc.c:509
static span_t * _memory_global_reserve
Global reserved spans.
Definition: rpmalloc.c:808
const rpmalloc_config_t * rpmalloc_config(void)
Get allocator configuration.
Definition: rpmalloc.c:3374
volatile _Atomic(int32_t)
Atomic access abstraction (since MSVC does not do C11 yet)
Definition: rpmalloc.c:333
static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add)
Definition: rpmalloc.c:363
static void _rpmalloc_span_double_link_list_pop_head(span_t **head, span_t *span)
Pop head span from double linked list.
Definition: rpmalloc.c:1227
#define _rpmalloc_stat_add64(counter, value)
Definition: rpmalloc.c:434
static void set_thread_heap(heap_t *heap)
Set the current thread heap.
Definition: rpmalloc.c:931
#define _rpmalloc_stat_add(counter, value)
Definition: rpmalloc.c:431
static void * _rpmalloc_aligned_allocate(heap_t *heap, size_t alignment, size_t size)
Definition: rpmalloc.c:2545
static int _memory_huge_pages
Huge page support.
Definition: rpmalloc.c:802
static FORCEINLINE int32_t atomic_decr32(atomic32_t *val)
Definition: rpmalloc.c:346
RPMALLOC_ALLOCATOR void * rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags)
Reallocate the given block to at least the given size and alignment,.
Definition: rpmalloc.c:3428
#define MAX_THREAD_SPAN_LARGE_CACHE
Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2)
Definition: rpmalloc.c:488
void rpmalloc_set_main_thread(void)
Set main thread ID.
Definition: rpmalloc.c:945
#define MEDIUM_CLASS_COUNT
Number of medium block size classes.
Definition: rpmalloc.c:468
static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count, uint32_t class_idx)
Definition: rpmalloc.c:2035
RPMALLOC_ALLOCATOR void * rpcalloc(size_t num, size_t size)
Definition: rpmalloc.c:3391
static void _rpmalloc_deallocate_large(span_t *span)
Deallocate the given large memory block to the current heap.
Definition: rpmalloc.c:2773
#define _memory_span_size
Hardwired span size.
Definition: rpmalloc.c:787
static void _rpmalloc_heap_initialize(heap_t *heap)
Definition: rpmalloc.c:2110
#define _rpmalloc_stat_add_peak(counter, value, peak)
Definition: rpmalloc.c:437
static void _rpmalloc_heap_orphan(heap_t *heap, int first_class)
Definition: rpmalloc.c:2121
RPMALLOC_ALLOCATOR void * rpmemalign(size_t alignment, size_t size)
Allocate a memory block of at least the given size and alignment.
Definition: rpmalloc.c:3473
static void _rpmalloc_deallocate_huge(span_t *)
Global cache.
Definition: rpmalloc.c:2838
static void _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span)
Move the span (used for small or medium allocations) to the heap thread cache.
Definition: rpmalloc.c:1465
static int _rpmalloc_initialized
Initialized flag.
Definition: rpmalloc.c:767
#define SPAN_HEADER_SIZE
Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two)
Definition: rpmalloc.c:481
static heap_t * _rpmalloc_heap_allocate_new(void)
Allocate a new heap from newly mapped memory pages.
Definition: rpmalloc.c:2135
static void * _rpmalloc_span_initialize_new(heap_t *heap, heap_size_class_t *heap_size_class, span_t *span, uint32_t class_idx)
Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list...
Definition: rpmalloc.c:1523
#define SMALL_GRANULARITY_SHIFT
Small granularity shift count.
Definition: rpmalloc.c:458
static void _rpmalloc_set_name(void *address, size_t size)
Low level memory map/unmap.
Definition: rpmalloc.c:990
static size_t _rpmalloc_span_align_count(size_t span_count)
Get the aligned number of spans to map in based on wanted count, configured mapping granularity and t...
Definition: rpmalloc.c:1300
#define LARGE_SIZE_LIMIT
Maximum size of a large block.
Definition: rpmalloc.c:477
static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset, size_t release)
Default implementation to unmap pages from virtual memory.
Definition: rpmalloc.c:1133
#define ENABLE_THREAD_CACHE
Enable per-thread cache.
Definition: rpmalloc.c:88
static void _rpmalloc_deallocate_direct_small_or_medium(span_t *span, void *block)
Deallocation entry points.
Definition: rpmalloc.c:2675
static void _rpmalloc_deallocate_defer_small_or_medium(span_t *span, void *block)
Put the block in the deferred free list of the owning span.
Definition: rpmalloc.c:2725
void rpmalloc_thread_collect(void)
Perform deferred deallocations pending for the calling thread heap.
Definition: rpmalloc.c:3491
void rpmalloc_thread_finalize(int release_caches)
Finalize thread, orphan heap.
Definition: rpmalloc.c:3360
int rpmalloc_is_thread_initialized(void)
Query if allocator is initialized for calling thread.
Definition: rpmalloc.c:3370
void rpmalloc_linker_reference(void)
Dummy empty function for forcing linker symbol inclusion.
Definition: rpmalloc.c:3996
#define FORCEINLINE
Platform and arch specifics.
Definition: rpmalloc.c:172
void rpmalloc_dump_statistics(void *file)
Dump all statistics in human readable format to file (should be a FILE*)
Definition: rpmalloc.c:3698
RPMALLOC_ALLOCATOR void * rpaligned_alloc(size_t alignment, size_t size)
Allocate a memory block of at least the given size and alignment.
Definition: rpmalloc.c:3442
size_t rpmalloc_usable_size(void *ptr)
Query the usable size of the given memory block (from given pointer to the end of block)
Definition: rpmalloc.c:3487
static void _rpmalloc_deallocate_small_or_medium(span_t *span, void *p)
Definition: rpmalloc.c:2749
static span_t * _rpmalloc_global_get_reserved_spans(size_t span_count)
Use global reserved spans to fulfill a memory map request (reserve size must be checked by caller)
Definition: rpmalloc.c:1190
#define _rpmalloc_stat_sub(counter, value)
Definition: rpmalloc.c:440
static void * _rpmalloc_mmap_os(size_t size, size_t *offset)
Default implementation to map new pages to virtual memory.
Definition: rpmalloc.c:1042
#define _rpmalloc_memcpy_const(x, y, s)
Definition: rpmalloc.c:64
static uintptr_t get_thread_id(void)
Fast thread ID.
Definition: rpmalloc.c:899
void rpmalloc_thread_initialize(void)
Initialize thread, assign heap.
Definition: rpmalloc.c:3346
static span_t * _rpmalloc_heap_thread_cache_deferred_extract(heap_t *heap, size_t span_count)
Definition: rpmalloc.c:1976
static void * _rpmalloc_allocate_large(heap_t *heap, size_t size)
Allocate a large sized memory block from the given heap.
Definition: rpmalloc.c:2475
RPMALLOC_ALLOCATOR void * rprealloc(void *ptr, size_t size)
Reallocate the given block to at least the given size.
Definition: rpmalloc.c:3417
#define EXPECTED(x)
Definition: rpmalloc.c:384
static FORCEINLINE void * atomic_load_ptr(atomicptr_t *src)
Definition: rpmalloc.c:366
static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master, span_t *subspan, size_t span_count)
Declare the span to be a subspan and store distance from master span and span count.
Definition: rpmalloc.c:1265
static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val)
Definition: rpmalloc.c:372
#define MEDIUM_GRANULARITY_SHIFT
Medium granularity shift count.
Definition: rpmalloc.c:466
#define INVALID_POINTER
Definition: rpmalloc.c:507
static size_t _memory_medium_size_limit
Run-time size limit of medium blocks.
Definition: rpmalloc.c:798
#define SPAN_FLAG_MASTER
Flag indicating span is the first (master) span of a split superspan.
Definition: rpmalloc.c:532
static void * _rpmalloc_reallocate(heap_t *heap, void *p, size_t size, size_t oldsize, unsigned int flags)
Reallocate the given block to the given size.
Definition: rpmalloc.c:2889
int rpmalloc_initialize(void)
Initialize the allocator and setup global data.
Definition: rpmalloc.c:3051
static span_t * _rpmalloc_heap_reserved_extract(heap_t *heap, size_t span_count)
Definition: rpmalloc.c:1988
static size_t _memory_page_size_shift
Shift to divide by page size.
Definition: rpmalloc.c:775
static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val)
Definition: rpmalloc.c:340
static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache)
Definition: rpmalloc.c:2303
static void _rpmalloc_global_set_reserved_spans(span_t *master, span_t *reserve, size_t reserve_span_count)
Store the given spans as global reserve (must only be called from within new heap allocation,...
Definition: rpmalloc.c:1205
#define SIZE_CLASS_COUNT
Total number of small + medium size classes.
Definition: rpmalloc.c:470
static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span, span_t **list_head)
Definition: rpmalloc.c:1578
#define _rpmalloc_stat_dec(counter)
Definition: rpmalloc.c:428
static uintptr_t _rpmalloc_main_thread_id
Main thread ID.
Definition: rpmalloc.c:769
#define _rpmalloc_stat_inc_free(heap, class_idx)
Definition: rpmalloc.c:446
static span_t * _rpmalloc_heap_thread_cache_extract(heap_t *heap, size_t span_count)
Extract the given number of spans from the different cache levels.
Definition: rpmalloc.c:1959
#define pointer_offset(ptr, ofs)
Definition: rpmalloc.c:503
#define _rpmalloc_stat_inc(counter)
Statistics related functions (evaluate to nothing when statistics not enabled)
Definition: rpmalloc.c:425
#define _rpmalloc_memset_const(x, y, s)
Definition: rpmalloc.c:65
static void * _rpmalloc_allocate_huge(heap_t *heap, size_t size)
Allocate a huge block by mapping memory pages directly.
Definition: rpmalloc.c:2505
#define _rpmalloc_stat_inc_alloc(heap, class_idx)
Definition: rpmalloc.c:443
static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span)
Span control.
Definition: rpmalloc.c:1896
static span_t * _rpmalloc_heap_global_cache_extract(heap_t *heap, size_t span_count)
Extract a span from the global cache.
Definition: rpmalloc.c:1996
static void _rpmalloc_unmap(void *address, size_t size, size_t offset, size_t release)
Unmap virtual memory.
Definition: rpmalloc.c:1028
static void _rpmalloc_span_double_link_list_add(span_t **head, span_t *span)
Span linked list management.
Definition: rpmalloc.c:1219
static void * _rpmalloc_allocate_medium(heap_t *heap, size_t size)
Allocate a medium sized memory block from the given heap.
Definition: rpmalloc.c:2458
#define HEAP_ARRAY_SIZE
Size of heap hashmap.
Definition: rpmalloc.c:84
static void _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span)
Definition: rpmalloc.c:2715
static void * free_list_pop(void **list)
Allocation entry points.
Definition: rpmalloc.c:2373
void rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats)
Get per-thread statistics.
Definition: rpmalloc.c:3493
static heap_t * _rpmalloc_heap_extract_orphan(heap_t **heap_list)
Definition: rpmalloc.c:2220
static span_t * _rpmalloc_heap_extract_new_span(heap_t *heap, heap_size_class_t *heap_size_class, size_t span_count, uint32_t class_idx)
Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping...
Definition: rpmalloc.c:2054
#define rpmalloc_assert(truth, message)
Definition: rpmalloc.c:258
#define SPAN_FLAG_ALIGNED_BLOCKS
Flag indicating span has blocks with increased alignment.
Definition: rpmalloc.c:536
static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count, size_t span_count, size_t align_offset)
Setup a newly mapped span.
Definition: rpmalloc.c:1312
#define _memory_default_span_mask
Definition: rpmalloc.c:764
static size_t _memory_heap_reserve_count
Number of spans to keep reserved in each heap.
Definition: rpmalloc.c:794
static size_t _rpmalloc_usable_size(void *p)
Reallocation entry points.
Definition: rpmalloc.c:3006
static uint32_t free_list_partial_init(void **list, void **first_block, void *page_start, void *block_start, uint32_t block_count, uint32_t block_size)
Initialize a (partial) free list up to next system memory page, while reserving the first block as al...
Definition: rpmalloc.c:1488
static int _rpmalloc_span_is_fully_utilized(span_t *span)
Definition: rpmalloc.c:1572
static rpmalloc_config_t _memory_config
Configuration.
Definition: rpmalloc.c:771
static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val)
Definition: rpmalloc.c:369
static void _rpmalloc_deallocate(void *p)
Deallocate the given block.
Definition: rpmalloc.c:2866
static heap_t * _memory_orphan_heaps
Orphaned heaps.
Definition: rpmalloc.c:818
static span_t * _rpmalloc_span_map(heap_t *heap, size_t span_count)
Map in memory pages for the given number of spans (or use previously reserved pages)
Definition: rpmalloc.c:1374
static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master, span_t *reserve, size_t reserve_span_count)
Store the given spans as reserve in the given heap.
Definition: rpmalloc.c:1779
RPMALLOC_ALLOCATOR void * rpaligned_calloc(size_t alignment, size_t num, size_t size)
Definition: rpmalloc.c:3448
void rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats)
Get global statistics.
Definition: rpmalloc.c:3576
int rpposix_memalign(void **memptr, size_t alignment, size_t size)
Allocate a memory block of at least the given size and alignment.
Definition: rpmalloc.c:3478
#define MAX_THREAD_SPAN_CACHE
Number of spans in thread cache.
Definition: rpmalloc.c:483
static void _rpmalloc_span_double_link_list_remove(span_t **head, span_t *span)
Remove a span from double linked list.
Definition: rpmalloc.c:1235
#define GLOBAL_CACHE_MULTIPLIER
Multiplier for global cache.
Definition: rpmalloc.c:133
#define MEDIUM_SIZE_LIMIT
Maximum size of a medium block.
Definition: rpmalloc.c:474
#define SMALL_SIZE_LIMIT
Maximum size of a small block.
Definition: rpmalloc.c:462
static void _rpmalloc_span_extract_free_list_deferred(span_t *span)
Definition: rpmalloc.c:1559
struct span_list_t span_list_t
Span list.
Definition: rpmalloc.c:523
static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add)
Definition: rpmalloc.c:349
static atomic32_t _memory_heap_id
Heap ID counter.
Definition: rpmalloc.c:800
static void _rpmalloc_heap_cache_adopt_deferred(heap_t *heap, span_t **single_span)
Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use.
Definition: rpmalloc.c:1789
#define pointer_diff(first, second)
Definition: rpmalloc.c:504
struct span_active_t span_active_t
Span active data.
Definition: rpmalloc.c:525
#define DEFAULT_SPAN_MAP_COUNT
Default number of spans to map in call to map more virtual memory (default values yield 4MiB here)
Definition: rpmalloc.c:129
#define UNEXPECTED(x)
Definition: rpmalloc.c:385
static span_t * _rpmalloc_span_map_from_reserve(heap_t *heap, size_t span_count)
Use reserved spans to fulfill a memory map request (reserve size must be checked by caller)
Definition: rpmalloc.c:1282
static void * _rpmalloc_mmap(size_t size, size_t *offset)
Map more virtual memory.
Definition: rpmalloc.c:1010
static heap_t * get_thread_heap(void)
Get the current thread heap.
Definition: rpmalloc.c:886
#define SMALL_CLASS_COUNT
Number of small block size classes.
Definition: rpmalloc.c:460
void rpmalloc_finalize(void)
Finalize the allocator.
Definition: rpmalloc.c:3297
#define RPMALLOC_ALLOCATOR
Definition: rpmalloc.h:46
#define RPMALLOC_GROW_OR_FAIL
Flag to rpaligned_realloc to fail and return null pointer if grow cannot be done in-place,...
Definition: rpmalloc.h:73
#define RPMALLOC_NO_PRESERVE
Flag to rpaligned_realloc to not preserve content in reallocation.
Definition: rpmalloc.h:68
uint32_t count
Cache count.
Definition: rpmalloc.c:742
span_t * overflow
Unlimited cache overflow.
Definition: rpmalloc.c:752
atomic32_t lock
Cache lock.
Definition: rpmalloc.c:740
span_t * span[GLOBAL_CACHE_MULTIPLIER *MAX_THREAD_SPAN_CACHE]
Cached spans.
Definition: rpmalloc.c:750
span_t * cache
Early level cache of fully free spans.
Definition: rpmalloc.c:664
void * free_list
Free list of active span.
Definition: rpmalloc.c:659
span_t * partial_span
Double linked list of partially used spans with free blocks.
Definition: rpmalloc.c:662
int finalize
Finalization state flag.
Definition: rpmalloc.c:698
int32_t id
Heap ID.
Definition: rpmalloc.c:696
heap_t * master_heap
Master heap owning the memory pages.
Definition: rpmalloc.c:700
atomicptr_t span_free_deferred
List of deferred free spans (single linked list)
Definition: rpmalloc.c:680
uintptr_t owner_thread
Owning thread ID.
Definition: rpmalloc.c:672
atomic32_t child_count
Child count.
Definition: rpmalloc.c:690
size_t full_span_count
Number of full spans.
Definition: rpmalloc.c:682
heap_size_class_t size_class[SIZE_CLASS_COUNT]
Free lists for each size class.
Definition: rpmalloc.c:674
heap_t * next_heap
Next heap in id list.
Definition: rpmalloc.c:692
heap_t * next_orphan
Next heap in orphan list.
Definition: rpmalloc.c:694
uint32_t spans_reserved
Number of mapped but unused spans.
Definition: rpmalloc.c:688
span_t * span_reserve
Mapped but unused spans.
Definition: rpmalloc.c:684
span_t * span_reserve_master
Master span for mapped but unused spans.
Definition: rpmalloc.c:686
void(* memory_unmap)(void *address, size_t size, size_t offset, size_t release)
Unmap the memory pages starting at address and spanning the given number of bytes.
Definition: rpmalloc.h:180
int(* map_fail_callback)(size_t size)
Called when a call to map memory pages fails (out of memory).
Definition: rpmalloc.h:193
size_t span_map_count
Number of spans to map at each request to map new virtual memory blocks.
Definition: rpmalloc.h:212
const char * page_name
Respectively allocated pages and huge allocated pages names for systems.
Definition: rpmalloc.h:224
int enable_huge_pages
Enable use of large/huge pages.
Definition: rpmalloc.h:221
const char * huge_page_name
Definition: rpmalloc.h:225
size_t span_size
Size of a span of memory blocks.
Definition: rpmalloc.h:204
void *(* memory_map)(size_t size, size_t *offset)
Map memory pages for the given number of bytes.
Definition: rpmalloc.h:169
size_t page_size
Size of memory pages.
Definition: rpmalloc.h:199
uint32_t block_size
Size of blocks in this class.
Definition: rpmalloc.c:730
uint16_t class_idx
Class index this class is merged with.
Definition: rpmalloc.c:734
uint16_t block_count
Number of blocks in each chunk.
Definition: rpmalloc.c:732
size_t count
Definition: rpmalloc.c:646
span_t * span[MAX_THREAD_SPAN_CACHE]
Definition: rpmalloc.c:647
span_t * span[MAX_THREAD_SPAN_LARGE_CACHE]
Definition: rpmalloc.c:653
uint32_t offset_from_master
Offset from master span for subspans.
Definition: rpmalloc.c:631
uint32_t align_offset
Alignment offset.
Definition: rpmalloc.c:635
atomicptr_t free_list_deferred
Deferred free list.
Definition: rpmalloc.c:619
heap_t * heap
Owning heap.
Definition: rpmalloc.c:637
span_t * prev
Previous span.
Definition: rpmalloc.c:641
atomic32_t remaining_spans
Remaining span counter, for master spans.
Definition: rpmalloc.c:633
uint32_t flags
Flags and counters.
Definition: rpmalloc.c:625
uint32_t size_class
Size class.
Definition: rpmalloc.c:613
uint32_t block_count
Total block count of size class.
Definition: rpmalloc.c:611
void * free_list
Free list.
Definition: rpmalloc.c:609
uint32_t block_size
Size of a block.
Definition: rpmalloc.c:623
uint32_t used_count
Number of used blocks remaining when in partial state.
Definition: rpmalloc.c:617
span_t * next
Next span.
Definition: rpmalloc.c:639
uint32_t list_size
Size of deferred free list, or list of spans when part of a cache list.
Definition: rpmalloc.c:621
uint32_t span_count
Number of spans.
Definition: rpmalloc.c:627
uint32_t free_list_limit
Index of last block initialized in free list.
Definition: rpmalloc.c:615
uint32_t total_spans
Total span counter for master spans.
Definition: rpmalloc.c:629