From 24b6d11c346ec90729405e7de40276af50e02aa7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 13 Jan 2006 18:38:56 +0000 Subject: [PATCH] Replace malloc(), calloc(), posix_memalign(), realloc(), and free() with a scalable concurrent allocator implementation. Reviewed by: current@ Approved by: phk, markm (mentor) --- lib/libc/stdlib/malloc.3 | 298 +- lib/libc/stdlib/malloc.c | 5684 +++++++++++++++++++++++++++++++------- 2 files changed, 4736 insertions(+), 1246 deletions(-) diff --git a/lib/libc/stdlib/malloc.3 b/lib/libc/stdlib/malloc.3 index 87e8ca28d5b0..9643a86d3ae2 100644 --- a/lib/libc/stdlib/malloc.3 +++ b/lib/libc/stdlib/malloc.3 @@ -13,11 +13,7 @@ .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 3. All advertising materials mentioning features or use of this software -.\" must display the following acknowledgement: -.\" This product includes software developed by the University of -.\" California, Berkeley and its contributors. -.\" 4. Neither the name of the University nor the names of its contributors +.\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" @@ -36,7 +32,7 @@ .\" @(#)malloc.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd August 19, 2004 +.Dd January 12, 2006 .Dt MALLOC 3 .Os .Sh NAME @@ -67,25 +63,9 @@ The .Fn malloc function allocates .Fa size -bytes of memory. +bytes of uninitialized memory. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object. -If the space is at least -.Va pagesize -bytes in length (see -.Xr getpagesize 3 ) , -the returned memory will be page boundary aligned as well. -If -.Fn malloc -fails, a -.Dv NULL -pointer is returned. -.Pp -Note that -.Fn malloc -does -.Em NOT -normally initialize the returned memory to zero bytes. .Pp The .Fn calloc @@ -113,20 +93,14 @@ The contents of the memory are unchanged up to the lesser of the new and old sizes. If the new size is larger, the value of the newly allocated portion of the memory is undefined. -If the requested memory cannot be allocated, -.Dv NULL -is returned and -the memory referenced by -.Fa ptr -is valid and unchanged. -If memory can be allocated, the memory referenced by +Upon success, the memory referenced by .Fa ptr is freed and a pointer to the newly allocated memory is returned. Note that .Fn realloc and .Fn reallocf -may move the memory allocation resulting in a different return value than +may move the memory allocation, resulting in a different return value than .Fa ptr . If .Fa ptr @@ -182,34 +156,46 @@ flags being set) become fatal. The process will call .Xr abort 3 in these cases. +.It C +Increase/decrease the size of the cache by a factor of two. +The default cache size is 256 objects for each arena. +This option can be specified multiple times. .It J Each byte of new memory allocated by .Fn malloc , .Fn realloc or .Fn reallocf -as well as all memory returned by +will be initialized to 0xa5. +All memory returned by .Fn free , .Fn realloc or .Fn reallocf -will be initialized to 0xd0. -This options also sets the -.Dq R -option. +will be initialized to 0x5a. This is intended for debugging and will impact performance negatively. -.It H -Pass a hint to the kernel about pages unused by the allocation functions. -This will help performance if the system is paging excessively. -This option is off by default. -.It R -Causes the -.Fn realloc -and -.Fn reallocf -functions to always reallocate memory even if the initial allocation was -sufficiently large. -This can substantially aid in compacting memory. +.It K +Increase/decrease the virtual memory chunk size by a factor of two. +The default chunk size is 16 MB. +This option can be specified multiple times. +.It N +Increase/decrease the number of arenas by a factor of two. +The default number of arenas is twice the number of CPUs, or one if there is a +single CPU. +This option can be specified multiple times. +.It P +Various statistics are printed at program exit via an +.Xr atexit 3 +function. +This has the potential to cause deadlock for a multi-threaded process that exits +while one or more threads are executing in the memory allocation functions. +Therefore, this option should only be used with care; it is primarily intended +as a performance tuning aid during application development. +.It Q +Increase/decrease the size of the allocation quantum by a factor of two. +The default quantum is the minimum allowed by the architecture (typically 8 or +16 bytes). +This option can be specified multiple times. .It U Generate .Dq utrace @@ -241,20 +227,18 @@ the source code: _malloc_options = "X"; .Ed .It Z -This option implicitly sets the -.Dq J +Each byte of new memory allocated by +.Fn malloc , +.Fn realloc +or +.Fn reallocf +will be initialized to 0x0. +Note that this initialization only happens once for each byte, so +.Fn realloc and -.Dq R -options, and then zeros out the bytes that were requested. +.Fn reallocf +calls do not zero memory that was previously allocated. This is intended for debugging and will impact performance negatively. -.It < -Reduce the size of the cache by a factor of two. -The default cache size is 16 pages. -This option can be specified multiple times. -.It > -Double the size of the cache by a factor of two. -The default cache size is 16 pages. -This option can be specified multiple times. .El .Pp The @@ -301,31 +285,63 @@ deallocates it in this case. The .Fn free function returns no value. +.Sh IMPLEMENTATION NOTES +This allocator uses multiple arenas in order to reduce lock contention for +threaded programs on multi-processor systems. +This works well with regard to threading scalability, but incurs some costs. +There is a small fixed per-arena overhead, and additionally, arenas manage +memory completely independently of each other, which means a small fixed +increase in overall memory fragmentation. +These overheads aren't generally an issue, given the number of arenas normally +used. +Note that using substantially more arenas than the default is not likely to +improve performance, mainly due to reduced cache performance. +However, it may make sense to reduce the number of arenas if an application +does not make much use of the allocation functions. +.Pp +This allocator uses a novel approach to object caching. +For objects below a size threshold (use the +.Dq P +option to discover the threshold), full deallocation and attempted coalescence +with adjacent memory regions are delayed. +This is so that if the application requests an allocation of that size soon +thereafter, the request can be met much more quickly. +Most applications heavily use a small number of object sizes, so this caching +has the potential to have a large positive performance impact. +However, the effectiveness of the cache depends on the cache being large enough +to absorb typical fluctuations in the number of allocated objects. +If an application routinely fluctuates by thousands of objects, then it may +make sense to increase the size of the cache. +Conversely, if an application's memory usage fluctuates very little, it may +make sense to reduce the size of the cache, so that unused regions can be +coalesced sooner. +.Pp +This allocator is very aggressive about tightly packing objects in memory, even +for objects much larger than the system page size. +For programs that allocate objects larger than half the system page size, this +has the potential to reduce memory footprint in comparison to other allocators. +However, it has some side effects that are important to keep in mind. +First, even multi-page objects can start at non-page-aligned addresses, since +the implementation only guarantees quantum alignment. +Second, this tight packing of objects can cause objects to share L1 cache +lines, which can be a performance issue for multi-threaded applications. +There are two ways to approach these issues. +First, +.Fn posix_memalign +provides the ability to align allocations as needed. +By aligning an allocation to at least the L1 cache line size, and padding the +allocation request by one cache line unit, the programmer can rest assured that +no cache line sharing will occur for the object. +Second, the +.Dq Q +option can be used to force all allocations to be aligned with the L1 cache +lines. +This approach should be used with care though, because although easy to +implement, it means that all allocations must be at least as large as the +quantum, which can cause severe internal fragmentation if the application +allocates many small objects. .Sh DEBUGGING MALLOC PROBLEMS -The major difference between this implementation and other allocation -implementations is that the free pages are not accessed unless allocated, -and are aggressively returned to the kernel for reuse. -.Bd -ragged -offset indent -Most allocation implementations will store a data structure containing a -linked list in the free chunks of memory, -used to tie all the free memory together. -That can be suboptimal, -as every time the free-list is traversed, -the otherwise unused, and likely paged out, -pages are faulted into primary memory. -On systems which are paging, -this can result in a factor of five increase in the number of page-faults -done by a process. -.Ed -.Pp -A side effect of this architecture is that many minor transgressions on -the interface which would traditionally not be detected are in fact -detected. -As a result, programs that have been running happily for -years may suddenly start to complain loudly, when linked with this -allocation implementation. -.Pp -The first and most important thing to do is to set the +The first thing to do is to set the .Dq A option. This option forces a coredump (if possible) at the first sign of trouble, @@ -335,16 +351,15 @@ It is probably also a good idea to recompile the program with suitable options and symbols for debugger support. .Pp If the program starts to give unusual results, coredump or generally behave -differently without emitting any of the messages listed in the next +differently without emitting any of the messages mentioned in the next section, it is likely because it depends on the storage being filled with zero bytes. -Try running it with +Try running it with the .Dq Z option set; if that improves the situation, this diagnosis has been confirmed. If the program still misbehaves, -the likely problem is accessing memory outside the allocated area, -more likely after than before the allocated area. +the likely problem is accessing memory outside the allocated area. .Pp Alternatively, if the symptoms are not easy to reproduce, setting the .Dq J @@ -356,20 +371,14 @@ option, if supported by the kernel, can provide a detailed trace of all calls made to these functions. .Pp Unfortunately this implementation does not provide much detail about -the problems it detects, the performance impact for storing such information +the problems it detects; the performance impact for storing such information would be prohibitive. -There are a number of allocation implementations available on the 'Net -which focus on detecting and pinpointing problems by trading performance -for extra sanity checks and detailed diagnostics. +There are a number of allocation implementations available on the Internet +which focus on detecting and pinpointing problems by trading performance for +extra sanity checks and detailed diagnostics. .Sh DIAGNOSTIC MESSAGES -If -.Fn malloc , -.Fn calloc , -.Fn realloc -or -.Fn free -detect an error or warning condition, -a message will be printed to file descriptor STDERR_FILENO. +If any of the memory allocation/deallocation functions detect an error or +warning condition, a message will be printed to file descriptor STDERR_FILENO. Errors will result in the process dumping core. If the .Dq A @@ -383,65 +392,11 @@ the .Dv stderr file descriptor is not suitable for this. Please note that doing anything which tries to allocate memory in -this function will assure death of the process. -.Pp -The following is a brief description of possible error messages and -their meanings: +this function is likely to result in a crash or deadlock. .Pp +All messages are prefixed by: .Bl -diag -.It "(ES): mumble mumble mumble" -The allocation functions were compiled with -.Dq EXTRA_SANITY -defined, and an error was found during the additional error checking. -Consult the source code for further information. -.It "mmap(2) failed, check limits" -This most likely means that the system is dangerously overloaded or that -the process' limits are incorrectly specified. -.It "freelist is destroyed" -The internal free-list has been corrupted. -.It "out of memory" -The -.Dq X -option was specified and an allocation of memory failed. -.El -.Pp -The following is a brief description of possible warning messages and -their meanings: -.Bl -diag -.It "chunk/page is already free" -The process attempted to -.Fn free -memory which had already been freed. -.It "junk pointer, ..." -A pointer specified to one of the allocation functions points outside the -bounds of the memory of which they are aware. -.It "malloc() has never been called" -No memory has been allocated, -yet something is being freed or -realloc'ed. -.It "modified (chunk-/page-) pointer" -The pointer passed to -.Fn free -or -.Fn realloc -has been modified. -.It "pointer to wrong page" -The pointer that -.Fn free , -.Fn realloc , -or -.Fn reallocf -is trying to free does not reference a possible page. -.It "recursive call" -A process has attempted to call an allocation function recursively. -This is not permitted. -In particular, signal handlers should not -attempt to allocate memory. -.It "unknown char in MALLOC_OPTIONS" -An unknown option was specified. -Even with the -.Dq A -option set, this warning is still only a warning. +.It : (malloc) .El .Sh ENVIRONMENT The following environment variables affect the execution of the allocation @@ -454,11 +409,10 @@ is set, the characters it contains will be interpreted as flags to the allocation functions. .El .Sh EXAMPLES -To set a systemwide reduction of cache size, and to dump core whenever -a problem occurs: +To dump core whenever a problem occurs: .Pp .Bd -literal -offset indent -ln -s 'A<' /etc/malloc.conf +ln -s 'A' /etc/malloc.conf .Ed .Pp To specify in the source that a program does no return value checking @@ -467,12 +421,12 @@ on calls to these functions: _malloc_options = "X"; .Ed .Sh SEE ALSO -.Xr brk 2 , .Xr mmap 2 , .Xr alloca 3 , +.Xr atexit 3 , .Xr getpagesize 3 , -.Xr memory 3 -.Pa /usr/share/doc/papers/malloc.ascii.gz +.Xr memory 3 , +.Xr posix_memalign 3 .Sh STANDARDS The .Fn malloc , @@ -483,25 +437,7 @@ and functions conform to .St -isoC . .Sh HISTORY -The present allocation implementation started out as a file system for a -drum attached to a 20bit binary challenged computer which was built -with discrete germanium transistors. -It has since graduated to -handle primary storage rather than secondary. -It first appeared in its new shape and ability in -.Fx 2.2 . -.Pp The .Fn reallocf function first appeared in .Fx 3.0 . -.Sh AUTHORS -.An Poul-Henning Kamp Aq phk@FreeBSD.org -.Sh BUGS -The messages printed in case of problems provide no detail about the -actual values. -.Pp -It can be argued that returning a -.Dv NULL -pointer when asked to -allocate zero bytes is a silly response to a silly question. diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index d279db6f8323..a3bc33511512 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -1,1213 +1,4738 @@ -/* - * ---------------------------------------------------------------------------- - * "THE BEER-WARE LICENSE" (Revision 42): - * wrote this file. As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you think - * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp - * ---------------------------------------------------------------------------- +/*- + * Copyright (C) 2006 Jason Evans . + * All rights reserved. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice(s), this list of conditions and the following disclaimer as + * the first lines of this file unmodified other than the possible + * addition of one or more copyright notices. + * 2. Redistributions in binary form must reproduce the above copyright + * notice(s), this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************* + * + * Following is a brief list of features that distinguish this malloc + * implementation: + * + * + Multiple arenas are used if there are multiple CPUs, which reduces lock + * contention and cache sloshing. + * + * + Cache line sharing between arenas is avoided for internal data + * structures. + * + * + Memory is managed in chunks, rather than as individual pages. + * + * + Allocations are region-based; internal region size is a discrete + * multiple of a quantum that is appropriate for alignment constraints. + * This applies to allocations that are up to half the chunk size. + * + * + Coalescence of regions is delayed in order to reduce overhead and + * fragmentation. + * + * + realloc() always moves data, in order to reduce fragmentation. + * + * + Red-black trees are used to sort large regions. + * + * + Data structures for huge allocations are stored separately from + * allocations, which reduces thrashing during low memory conditions. + * + ******************************************************************************* */ +/* + ******************************************************************************* + * + * Ring macros. + * + ******************************************************************************* + */ + +/* Ring definitions. */ +#define qr(a_type) struct { \ + a_type *qre_next; \ + a_type *qre_prev; \ +} + +#define qr_initializer {NULL, NULL} + +/* Ring functions. */ +#define qr_new(a_qr, a_field) do { \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) + +#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) + +#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qrelm); \ + (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ + (a_qrelm)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_after_insert(a_qrelm, a_qr, a_field) do { \ + (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ + (a_qr)->a_field.qre_prev = (a_qrelm); \ + (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \ + (a_qrelm)->a_field.qre_next = (a_qr); \ +} while (0) + +#define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do { \ + a_type *t; \ + (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ + (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ + t = (a_qr_a)->a_field.qre_prev; \ + (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \ + (a_qr_b)->a_field.qre_prev = t; \ +} while (0) + +/* qr_meld() and qr_split() are functionally equivalent, so there's no need to + * have two copies of the code. */ +#define qr_split(a_qr_a, a_qr_b, a_type, a_field) \ + qr_meld((a_qr_a), (a_qr_b), a_type, a_field) + +#define qr_remove(a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev->a_field.qre_next \ + = (a_qr)->a_field.qre_next; \ + (a_qr)->a_field.qre_next->a_field.qre_prev \ + = (a_qr)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_foreach(var, a_qr, a_field) \ + for ((var) = (a_qr); \ + (var) != NULL; \ + (var) = (((var)->a_field.qre_next != (a_qr)) \ + ? (var)->a_field.qre_next : NULL)) + +#define qr_reverse_foreach(var, a_qr, a_field) \ + for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ + (var) != NULL; \ + (var) = (((var) != (a_qr)) \ + ? (var)->a_field.qre_prev : NULL)) + +/******************************************************************************/ + +#define MALLOC_DEBUG + #include __FBSDID("$FreeBSD$"); -/* - * Defining MALLOC_EXTRA_SANITY will enable extra checks which are related - * to internal conditions and consistency in malloc.c. This has a - * noticeable runtime performance hit, and generally will not do you - * any good unless you fiddle with the internals of malloc or want - * to catch random pointer corruption as early as possible. - */ -#undef MALLOC_EXTRA_SANITY - -/* - * What to use for Junk. This is the byte value we use to fill with - * when the 'J' option is enabled. - */ -#define SOME_JUNK 0xd0 /* as in "Duh" :-) */ - -/* - * The basic parameters you can tweak. - * - * malloc_pageshift pagesize = 1 << malloc_pageshift - * It's probably best if this is the native - * page size, but it doesn't have to be. - * - * malloc_minsize minimum size of an allocation in bytes. - * If this is too small it's too much work - * to manage them. This is also the smallest - * unit of alignment used for the storage - * returned by malloc/realloc. - * - */ - -#include "namespace.h" -#if defined(__FreeBSD__) -# if defined(__i386__) -# define malloc_pageshift 12U -# define malloc_minsize 16U -# endif -# if defined(__ia64__) -# define malloc_pageshift 13U -# define malloc_minsize 16U -# endif -# if defined(__alpha__) -# define malloc_pageshift 13U -# define malloc_minsize 16U -# endif -# if defined(__sparc64__) -# define malloc_pageshift 13U -# define malloc_minsize 16U -# endif -# if defined(__amd64__) -# define malloc_pageshift 12U -# define malloc_minsize 16U -# endif -# if defined(__arm__) -# define malloc_pageshift 12U -# define malloc_minsize 16U -# endif -# define HAS_UTRACE - /* - * Make malloc/free/realloc thread-safe in libc for use with - * kernel threads. - */ -# include "libc_private.h" -# include "spinlock.h" - static spinlock_t thread_lock = _SPINLOCK_INITIALIZER; - spinlock_t *__malloc_lock = &thread_lock; -# define _MALLOC_LOCK() if (__isthreaded) _SPINLOCK(&thread_lock); -# define _MALLOC_UNLOCK() if (__isthreaded) _SPINUNLOCK(&thread_lock); -#endif /* __FreeBSD__ */ - -#if defined(__sparc__) && defined(sun) -# define malloc_pageshift 12U -# define malloc_minsize 16U -# define MAP_ANON (0) - static int fdzero; -# define MMAP_FD fdzero -# define INIT_MMAP() \ - { if ((fdzero = _open(_PATH_DEVZERO, O_RDWR, 0000)) == -1) \ - wrterror("open of /dev/zero"); } -# define MADV_FREE MADV_DONTNEED -#endif /* __sparc__ */ - -/* Insert your combination here... */ -#if defined(__FOOCPU__) && defined(__BAROS__) -# define malloc_pageshift 12U -# define malloc_minsize 16U -#endif /* __FOOCPU__ && __BAROS__ */ - -#ifndef ZEROSIZEPTR -#define ZEROSIZEPTR ((void *)(uintptr_t)(1 << (malloc_pageshift - 1))) +#include "libc_private.h" +#ifdef MALLOC_DEBUG +# define _LOCK_DEBUG #endif - -/* - * No user serviceable parts behind this point. - */ -#include +#include "spinlock.h" +#include "namespace.h" #include +#include +#include +#include +#include +#include +#include +#include +#include /* Must come after several other sys/ includes. */ + +#include +#include +#include + #include -#include -#include -#include +#include +#include +#include +#include +#include #include #include #include +#include #include + #include "un-namespace.h" /* - * This structure describes a page worth of chunks. + * Calculate statistics that can be used to get an idea of how well caching is + * working. + */ +#define MALLOC_STATS +/* #define MALLOC_STATS_BASE */ +#define MALLOC_STATS_ARENAS + +/* + * Include redzones before/after every region, and check for buffer overflows. + */ +#define MALLOC_REDZONES +#ifdef MALLOC_REDZONES +# define MALLOC_RED_2POW 4 +# define MALLOC_RED ((size_t)(1 << MALLOC_RED_2POW)) +#endif + +#ifndef MALLOC_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif +#include + +#ifdef MALLOC_DEBUG + /* Disable inlining to make debugging easier. */ +# define __inline +#endif + +/* Size of stack-allocated buffer passed to strerror_r(). */ +#define STRERROR_BUF 64 + +/* Number of quantum-spaced bins to store free regions in. */ +#define NBINS 128 + +/* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */ +#ifdef __i386__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR 4 +# define USE_BRK +#endif +#ifdef __ia64__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR 8 +#endif +#ifdef __alpha__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR 8 +# define NO_TLS +#endif +#ifdef __sparc64__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR 8 +# define NO_TLS +#endif +#ifdef __amd64__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR 8 +#endif +#ifdef __arm__ +# define QUANTUM_2POW_MIN 3 +# define SIZEOF_PTR 4 +# define USE_BRK +# define NO_TLS +#endif +#ifdef __powerpc__ +# define QUANTUM_2POW_MIN 4 +# define SIZEOF_PTR 4 +# define USE_BRK +#endif + +/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ +#if (!defined(PIC) && !defined(NO_TLS)) +# define NO_TLS +#endif + +/* + * Size and alignment of memory chunks that are allocated by the OS's virtual + * memory system. + * + * chunk_size must be at least as large as the system page size. In practice, + * it actually needs to be quite a bit larger (64 KB or so), because calling + * _pthread_self() can trigger a ~16 KB allocation due to libpthread + * initialization, and infinite recursion will occur if libpthread + * initialization requires a chunk allocation. + * + * chunk_size must be <= 2^29. + */ +#define CHUNK_2POW_MIN 16 +#define CHUNK_2POW_DEFAULT 24 +#define CHUNK_2POW_MAX 29 + +/* + * Maximum size of L1 cache line. This is used to avoid cache line aliasing, + * so over-estimates are okay (up to a point), but under-estimates will + * negatively affect performance. + * + * Most allocations from base_arena use this, in order to avoid any false + * sharing. + */ +#define CACHELINE_2POW 6 +#define CACHELINE ((size_t) (1 << CACHELINE_2POW)) + +/* Default number of regions to delay coalescence for. */ +#define NDELAY 256 + +/******************************************************************************/ + +/* + * Mutexes based on spinlocks. We can't use normal pthread mutexes, because + * they require malloc()ed memory. + */ +typedef struct { + spinlock_t lock; + bool recursive; + volatile int val; +} malloc_mutex_t; + +static bool malloc_initialized = false; + +/******************************************************************************/ +/* + * Statistics data structures. */ -struct pginfo { - struct pginfo *next; /* next on the free list */ - void *page; /* Pointer to the page */ - u_short size; /* size of this page's chunks */ - u_short shift; /* How far to shift for this size chunks */ - u_short free; /* How many free chunks */ - u_short total; /* How many chunk */ - u_int bits[1]; /* Which chunks are free */ +#ifdef MALLOC_STATS + +typedef struct malloc_bin_stats_s malloc_bin_stats_t; +struct malloc_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; + + /* + * Number of best-fit allocations that were successfully serviced by + * this bin. + */ + uint64_t nfit; + + /* + * Number of allocation requests that were successfully serviced by this + * bin, but that a smaller bin could have serviced. + */ + uint64_t noverfit; + + /* High-water marks for this bin. */ + unsigned long highcached; + + /* + * Current number of regions in this bin. This number isn't needed + * during normal operation, so is maintained here in order to allow + * calculating the high water mark. + */ + unsigned nregions; +}; + +typedef struct arena_stats_s arena_stats_t; +struct arena_stats_s { + /* Number of times each function was called. */ + uint64_t nmalloc; + uint64_t npalloc; + uint64_t ncalloc; + uint64_t ndalloc; + uint64_t nralloc; + + /* Number of region splits. */ + uint64_t nsplit; + + /* Number of region coalescences. */ + uint64_t ncoalesce; + + /* Bin statistics. */ + malloc_bin_stats_t bins[NBINS]; + + /* Split statistics. */ + struct { + /* + * Number of times a region is requested from the "split" field + * of the arena. + */ + uint64_t nrequests; + + /* + * Number of times the "split" field of the arena successfully + * services requests. + */ + uint64_t nserviced; + } split; + + /* Frag statistics. */ + struct { + /* + * Number of times a region is cached in the "frag" field of + * the arena. + */ + uint64_t ncached; + + /* + * Number of times a region is requested from the "frag" field + * of the arena. + */ + uint64_t nrequests; + + /* + * Number of times the "frag" field of the arena successfully + * services requests. + */ + uint64_t nserviced; + } frag; + + /* large and large_regions statistics. */ + struct { + /* + * Number of allocation requests that were too large for a bin, + * but not large enough for a hugh allocation. + */ + uint64_t nrequests; + + /* + * Number of best-fit allocations that were successfully + * serviced by large_regions. + */ + uint64_t nfit; + + /* + * Number of allocation requests that were successfully serviced + * large_regions, but that a bin could have serviced. + */ + uint64_t noverfit; + + /* + * High-water mark for large_regions (number of nodes in tree). + */ + unsigned long highcached; + + /* + * Used only to store the current number of nodes, since this + * number isn't maintained anywhere else. + */ + unsigned long curcached; + } large; + + /* Huge allocation statistics. */ + struct { + /* Number of huge allocation requests. */ + uint64_t nrequests; + } huge; +}; + +typedef struct chunk_stats_s chunk_stats_t; +struct chunk_stats_s { + /* Number of chunks that were allocated. */ + uint64_t nchunks; + + /* High-water mark for number of chunks allocated. */ + unsigned long highchunks; + + /* + * Current number of chunks allocated. This value isn't maintained for + * any other purpose, so keep track of it in order to be able to set + * highchunks. + */ + unsigned long curchunks; +}; + +#endif /* #ifdef MALLOC_STATS */ + +/******************************************************************************/ +/* + * Chunk data structures. + */ + +/* Needed by chunk data structures. */ +typedef struct arena_s arena_t; + +/* Tree of chunks. */ +typedef struct chunk_node_s chunk_node_t; +struct chunk_node_s { + /* + * For an active chunk that is currently carved into regions by an + * arena allocator, this points to the arena that owns the chunk. We + * set this pointer even for huge allocations, so that it is possible + * to tell whether a huge allocation was done on behalf of a user + * allocation request, or on behalf of an internal allocation request. + */ + arena_t *arena; + + /* Linkage for the chunk tree. */ + RB_ENTRY(chunk_node_s) link; + + /* + * Pointer to the chunk that this tree node is responsible for. In some + * (but certainly not all) cases, this data structure is placed at the + * beginning of the corresponding chunk, so this field may point to this + * node. + */ + void *chunk; + + /* Total chunk size. */ + size_t size; + + /* Number of trailing bytes that are not used. */ + size_t extra; +}; +typedef struct chunk_tree_s chunk_tree_t; +RB_HEAD(chunk_tree_s, chunk_node_s); + +/******************************************************************************/ +/* + * Region data structures. + */ + +typedef struct region_s region_t; + +/* + * Tree of region headers, used for free regions that don't fit in the arena + * bins. + */ +typedef struct region_node_s region_node_t; +struct region_node_s { + RB_ENTRY(region_node_s) link; + region_t *reg; +}; +typedef struct region_tree_s region_tree_t; +RB_HEAD(region_tree_s, region_node_s); + +typedef struct region_prev_s region_prev_t; +struct region_prev_s { + uint32_t size; +}; + +#define NEXT_SIZE_MASK 0x1fffffffU +typedef struct { +#ifdef MALLOC_REDZONES + char prev_red[MALLOC_RED]; +#endif + /* + * Typical bit pattern for bits: + * + * pncsssss ssssssss ssssssss ssssssss + * + * p : Previous free? + * n : Next free? + * c : Part of a range of contiguous allocations? + * s : Next size (number of quanta). + * + * It's tempting to use structure bitfields here, but the compiler has + * to make assumptions that make the code slower than direct bit + * manipulations, and these fields are manipulated a lot. + */ + uint32_t bits; + +#ifdef MALLOC_REDZONES + size_t next_exact_size; + char next_red[MALLOC_RED]; +#endif +} region_sep_t; + +typedef struct region_next_small_sizer_s region_next_small_sizer_t; +struct region_next_small_sizer_s +{ + qr(region_t) link; +}; + +typedef struct region_next_small_s region_next_small_t; +struct region_next_small_s +{ + qr(region_t) link; + + /* Only accessed for delayed regions & footer invalid. */ + uint32_t slot; +}; + +typedef struct region_next_large_s region_next_large_t; +struct region_next_large_s +{ + region_node_t node; + + /* Use for LRU vs MRU tree ordering. */ + bool lru; +}; + +typedef struct region_next_s region_next_t; +struct region_next_s { + union { + region_next_small_t s; + region_next_large_t l; + } u; }; /* - * This structure describes a number of free pages. + * Region separator, including prev/next fields that are only accessible when + * the neighboring regions are free. */ +struct region_s { + /* This field must only be accessed if sep.prev_free is true. */ + region_prev_t prev; -struct pgfree { - struct pgfree *next; /* next run of free pages */ - struct pgfree *prev; /* prev run of free pages */ - void *page; /* pointer to free pages */ - void *end; /* pointer to end of free pages */ - size_t size; /* number of bytes free */ + /* Actual region separator that is always present between regions. */ + region_sep_t sep; + + /* + * These fields must only be accessed if sep.next_free or + * sep.next_contig is true. + */ + region_next_t next; }; +/* Small variant of region separator, only used for size calculations. */ +typedef struct region_small_sizer_s region_small_sizer_t; +struct region_small_sizer_s { + region_prev_t prev; + region_sep_t sep; + region_next_small_sizer_t next; +}; + +/******************************************************************************/ /* - * How many bits per u_int in the bitmap. - * Change only if not 8 bits/byte + * Arena data structures. */ -#define MALLOC_BITS (8*sizeof(u_int)) + +typedef struct arena_bin_s arena_bin_t; +struct arena_bin_s { + /* + * Link into ring before the oldest free region and just after the + * newest free region. + */ + region_t regions; +}; + +struct arena_s { +#ifdef MALLOC_DEBUG + uint32_t magic; +# define ARENA_MAGIC 0x947d3d24 +#endif + + /* All operations on this arena require that mtx be locked. */ + malloc_mutex_t mtx; + + /* True if this arena is allocated from base_arena. */ + bool malloced; + + /* + * bins is used to store rings of free regions of the following sizes, + * assuming a 16-byte quantum (sizes include region separators): + * + * bins[i] | size | + * --------+------+ + * 0 | 32 | + * 1 | 48 | + * 2 | 64 | + * : : + * : : + * --------+------+ + */ + arena_bin_t bins[NBINS]; + + /* + * A bitmask that corresponds to which bins have elements in them. + * This is used when searching for the first bin that contains a free + * region that is large enough to service an allocation request. + */ +#define BINMASK_NELMS (NBINS / (sizeof(int) << 3)) + int bins_mask[BINMASK_NELMS]; + + /* + * Tree of free regions of the size range [bin_maxsize..~chunk). These + * are sorted primarily by size, and secondarily by LRU. + */ + region_tree_t large_regions; + + /* + * If not NULL, a region that is not stored in bins or large_regions. + * If large enough, this region is used instead of any region stored in + * bins or large_regions, in order to reduce the number of insert/remove + * operations, and in order to increase locality of allocation in + * common cases. + */ + region_t *split; + + /* + * If not NULL, a region that is not stored in bins or large_regions. + * If large enough, this region is preferentially used for small + * allocations over any region in large_regions, split, or over-fit + * small bins. + */ + region_t *frag; + + /* Tree of chunks that this arena currenly owns. */ + chunk_tree_t chunks; + unsigned nchunks; + + /* + * FIFO ring of free regions for which coalescence is delayed. A slot + * that contains NULL is considered empty. opt_ndelay stores how many + * elements there are in the FIFO. + */ + region_t **delayed; + uint32_t next_delayed; /* Next slot in delayed to use. */ + +#ifdef MALLOC_STATS + /* Total byte count of allocated memory, not including overhead. */ + size_t allocated; + + arena_stats_t stats; +#endif +}; + +/******************************************************************************/ +/* + * Data. + */ + +/* Used as a special "nil" return value for malloc(0). */ +static int nil; + +/* Number of CPUs. */ +static unsigned ncpus; + +/* VM page size. */ +static unsigned pagesize; + +/* Various quantum-related settings. */ +static size_t quantum; +static size_t quantum_mask; /* (quantum - 1). */ +static size_t bin_shift; +static size_t bin_maxsize; + +/* Various chunk-related settings. */ +static size_t chunk_size; +static size_t chunk_size_mask; /* (chunk_size - 1). */ + +/********/ +/* + * Chunks. + */ + +/* Protects chunk-related data structures. */ +static malloc_mutex_t chunks_mtx; + +/* Tree of chunks that are stand-alone huge allocations. */ +static chunk_tree_t huge; + +#ifdef USE_BRK +/* + * Try to use brk for chunk-size allocations, due to address space constraints. + */ +void *brk_base; /* Result of first sbrk(0) call. */ +void *brk_prev; /* Current end of brk, or ((void *)-1) if brk is exhausted. */ +void *brk_max; /* Upper limit on brk addresses (may be an over-estimate). */ +#endif + +#ifdef MALLOC_STATS +/* + * Byte counters for allocated/total space used by the chunks in the huge + * allocations tree. + */ +static size_t huge_allocated; +static size_t huge_total; +#endif /* - * Magic values to put in the page_directory + * Tree of chunks that were previously allocated. This is used when allocating + * chunks, in an attempt to re-use address space. */ -#define MALLOC_NOT_MINE ((struct pginfo*) 0) -#define MALLOC_FREE ((struct pginfo*) 1) -#define MALLOC_FIRST ((struct pginfo*) 2) -#define MALLOC_FOLLOW ((struct pginfo*) 3) -#define MALLOC_MAGIC ((struct pginfo*) 4) +static chunk_tree_t old_chunks; -#ifndef malloc_pageshift -#define malloc_pageshift 12U +/********/ +/* + * Arenas. + */ + +/* Arena that is used for internal memory allocations. */ +static arena_t base_arena; + +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +static arena_t **arenas; +static unsigned narenas; +#ifndef NO_TLS +static unsigned next_arena; +#endif +static malloc_mutex_t arenas_mtx; /* Protects arenas initialization. */ + +#ifndef NO_TLS +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +static __thread arena_t *arenas_map; #endif -#ifndef malloc_minsize -#define malloc_minsize 16U +#ifdef MALLOC_STATS +/* Chunk statistics. */ +chunk_stats_t stats_chunks; #endif -#if !defined(malloc_pagesize) -#define malloc_pagesize (1UL<>1) +static void arena_dalloc(arena_t *arena, void *ptr); +#ifdef NOT_YET +static void *arena_ralloc(arena_t *arena, void *ptr, size_t size); #endif - -/* A mask for the offset inside a page. */ -#define malloc_pagemask ((malloc_pagesize)-1) - -#define pageround(foo) (((foo) + (malloc_pagemask))&(~(malloc_pagemask))) -#define ptr2index(foo) (((u_long)(foo) >> malloc_pageshift)-malloc_origo) - -#ifndef _MALLOC_LOCK -#define _MALLOC_LOCK() +#ifdef MALLOC_STATS +static bool arena_stats(arena_t *arena, size_t *allocated, size_t *total); #endif - -#ifndef _MALLOC_UNLOCK -#define _MALLOC_UNLOCK() +static void arena_new(arena_t *arena, bool malloced, bool recursive); +static arena_t *arenas_extend(unsigned ind); +#ifndef NO_TLS +static arena_t *choose_arena_hard(void); #endif - -#ifndef MMAP_FD -#define MMAP_FD (-1) +static void *huge_malloc(arena_t *arena, size_t size); +static void huge_dalloc(void *ptr); +static void *imalloc(arena_t *arena, size_t size); +static void *ipalloc(arena_t *arena, size_t alignment, size_t size); +static void *icalloc(arena_t *arena, size_t num, size_t size); +static size_t isalloc(void *ptr); +static void idalloc(void *ptr); +static void *iralloc(arena_t *arena, void *ptr, size_t size); +#ifdef MALLOC_STATS +static void istats(size_t *allocated, size_t *total); #endif - -#ifndef INIT_MMAP -#define INIT_MMAP() -#endif - -/* Number of free pages we cache */ -static unsigned malloc_cache = 16; - -/* The offset from pagenumber to index into the page directory */ -static u_long malloc_origo; - -/* The last index in the page directory we care about */ -static u_long last_index; - -/* Pointer to page directory. Allocated "as if with" malloc */ -static struct pginfo **page_dir; - -/* How many slots in the page directory */ -static unsigned malloc_ninfo; - -/* Free pages line up here */ -static struct pgfree free_list; - -/* Abort(), user doesn't handle problems. */ -static int malloc_abort = 1; - -/* Are we trying to die ? */ -static int suicide; - -/* always realloc ? */ -static int malloc_realloc; - -#if defined(MADV_FREE) -/* pass the kernel a hint on free pages ? */ -static int malloc_hint = 0; -#endif - -/* xmalloc behaviour ? */ -static int malloc_xmalloc; - -/* sysv behaviour for malloc(0) ? */ -static int malloc_sysv; - -/* zero fill ? */ -static int malloc_zero; - -/* junk fill ? */ -static int malloc_junk = 1; - -#ifdef HAS_UTRACE - -/* utrace ? */ -static int malloc_utrace; - -struct ut { void *p; size_t s; void *r; }; - -void utrace(struct ut *, int); - -#define UTRACE(a, b, c) \ - if (malloc_utrace) \ - {struct ut u; u.p=a; u.s = b; u.r=c; utrace(&u, sizeof u);} -#else /* !HAS_UTRACE */ -#define UTRACE(a,b,c) -#endif /* HAS_UTRACE */ - -/* my last break. */ -static void *malloc_brk; - -/* one location cache for free-list holders */ -static struct pgfree *px; - -/* compile-time options */ -const char *_malloc_options; - -/* Name of the current public function */ -static const char *malloc_func; - -/* Macro for mmap */ -#define MMAP(size) \ - mmap(NULL, (size), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, \ - MMAP_FD, (off_t)0); +static void malloc_print_stats(void); +static void malloc_init_hard(void); /* - * Necessary function declarations + * End function prototypes. */ -static int extend_pgdir(u_long index); -static void *imalloc(size_t size); -static void ifree(void *ptr); -static void *irealloc(void *ptr, size_t size); +/******************************************************************************/ +/* + * Begin mutex. + */ + +static void +malloc_mutex_init(malloc_mutex_t *a_mutex) +{ + static const spinlock_t lock = _SPINLOCK_INITIALIZER; + + a_mutex->lock = lock; + a_mutex->recursive = false; + a_mutex->val = 0; +} + +static void +malloc_mutex_recursive_init(malloc_mutex_t *a_mutex) +{ + static const spinlock_t lock = _SPINLOCK_INITIALIZER; + + a_mutex->lock = lock; + a_mutex->recursive = true; + a_mutex->val = 0; +} + +static __inline void +malloc_mutex_lock(malloc_mutex_t *a_mutex) +{ +#define MAX_SPIN 1024 + + if (__isthreaded) { + if (a_mutex->recursive == false) + _SPINLOCK(&a_mutex->lock); + else { + volatile long *owner = &a_mutex->lock.lock_owner; + long self; + + self = (long)_pthread_self(); + + if (atomic_cmpset_acq_long(owner, self, self) == 0) + _SPINLOCK(&a_mutex->lock); + a_mutex->val++; + } + } +} + +static __inline void +malloc_mutex_unlock(malloc_mutex_t *a_mutex) +{ + + if (__isthreaded) { + if (a_mutex->recursive == false) { + _SPINUNLOCK(&a_mutex->lock); + } else { + a_mutex->val--; + if (a_mutex->val == 0) { + _SPINUNLOCK(&a_mutex->lock); + } + } + } +} + +/* + * End mutex. + */ +/******************************************************************************/ +/* + * Begin Utility functions/macros. + */ + +/* Return the chunk address for allocation address a. */ +#define CHUNK_ADDR2BASE(a) \ + ((void *) ((size_t) (a) & ~chunk_size_mask)) + +/* Return the chunk offset of address a. */ +#define CHUNK_ADDR2OFFSET(a) \ + ((size_t) (a) & chunk_size_mask) + +/* Return the smallest chunk multiple that is >= s. */ +#define CHUNK_CEILING(s) \ + (((s) + chunk_size_mask) & ~chunk_size_mask) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + quantum_mask) & ~quantum_mask) + +/* Return the offset within a chunk to the first region separator. */ +#define CHUNK_REG_OFFSET \ + (QUANTUM_CEILING(sizeof(chunk_node_t) + \ + sizeof(region_sep_t)) - offsetof(region_t, next)) + +/* + * Return how many bytes of usable space are needed for an allocation of size + * bytes. This value is not a multiple of quantum, since it doesn't include + * the region separator. + */ +static __inline size_t +region_ceiling(size_t size) +{ + size_t quantum_size, min_reg_quantum; + + quantum_size = QUANTUM_CEILING(size + sizeof(region_sep_t)); + min_reg_quantum = QUANTUM_CEILING(sizeof(region_small_sizer_t)); + if (quantum_size >= min_reg_quantum) + return (quantum_size); + else + return (min_reg_quantum); +} static void wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4) { - _write(STDERR_FILENO, p1, strlen(p1)); - _write(STDERR_FILENO, p2, strlen(p2)); - _write(STDERR_FILENO, p3, strlen(p3)); - _write(STDERR_FILENO, p4, strlen(p4)); + _write(STDERR_FILENO, p1, strlen(p1)); + _write(STDERR_FILENO, p2, strlen(p2)); + _write(STDERR_FILENO, p3, strlen(p3)); + _write(STDERR_FILENO, p4, strlen(p4)); } -void (*_malloc_message)(const char *p1, const char *p2, const char *p3, +void (*_malloc_message)(const char *p1, const char *p2, const char *p3, const char *p4) = wrtmessage; +/* + * Print to stderr in such a way as to (hopefully) avoid memory allocation. + */ static void -wrterror(char const *p) +malloc_printf(const char *format, ...) { + char buf[4096]; + va_list ap; - suicide = 1; - _malloc_message(_getprogname(), malloc_func, " error: ", p); - abort(); -} - -static void -wrtwarning(char *p) -{ - - /* - * Sensitive processes, somewhat arbitrarily defined here as setuid, - * setgid, root and wheel cannot afford to have malloc mistakes. - */ - if (malloc_abort || issetugid() || getuid() == 0 || getgid() == 0) - wrterror(p); - _malloc_message(_getprogname(), malloc_func, " warning: ", p); + va_start(ap, format); + vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + _malloc_message(buf, "", "", ""); } /* - * Allocate a number of pages from the OS + * Note that no bitshifting is done for booleans in any of the bitmask-based + * flag manipulation functions that follow; test for non-zero versus zero. */ -static void * -map_pages(size_t pages) + +/**********************/ +static __inline uint32_t +region_prev_free_get(region_sep_t *sep) { - caddr_t result, tail; - result = (caddr_t)pageround((u_long)sbrk(0)); - tail = result + (pages << malloc_pageshift); - if (tail < result) - return (NULL); - - if (brk(tail)) { -#ifdef MALLOC_EXTRA_SANITY - wrterror("(ES): map_pages fails\n"); -#endif /* MALLOC_EXTRA_SANITY */ - return (NULL); - } - - last_index = ptr2index(tail) - 1; - malloc_brk = tail; - - if ((last_index+1) >= malloc_ninfo && !extend_pgdir(last_index)) - return (NULL); - - return (result); + return ((sep->bits) & 0x80000000U); } -/* - * Extend page directory - */ -static int -extend_pgdir(u_long index) +static __inline void +region_prev_free_set(region_sep_t *sep) { - struct pginfo **new, **old; - u_long i, oldlen; - /* Make it this many pages */ - i = index * sizeof *page_dir; - i /= malloc_pagesize; - i += 2; - - /* remember the old mapping size */ - oldlen = malloc_ninfo * sizeof *page_dir; - - /* - * NOTE: we allocate new pages and copy the directory rather than tempt - * fate by trying to "grow" the region.. There is nothing to prevent - * us from accidently re-mapping space that's been allocated by our caller - * via dlopen() or other mmap(). - * - * The copy problem is not too bad, as there is 4K of page index per - * 4MB of malloc arena. - * - * We can totally avoid the copy if we open a file descriptor to associate - * the anon mappings with. Then, when we remap the pages at the new - * address, the old pages will be "magically" remapped.. But this means - * keeping open a "secret" file descriptor..... - */ - - /* Get new pages */ - new = (struct pginfo**) MMAP(i * malloc_pagesize); - if (new == MAP_FAILED) - return (0); - - /* Copy the old stuff */ - memcpy(new, page_dir, - malloc_ninfo * sizeof *page_dir); - - /* register the new size */ - malloc_ninfo = i * malloc_pagesize / sizeof *page_dir; - - /* swap the pointers */ - old = page_dir; - page_dir = new; - - /* Now free the old stuff */ - munmap(old, oldlen); - return (1); + sep->bits = ((sep->bits) | 0x80000000U); } -/* - * Initialize the world - */ +static __inline void +region_prev_free_unset(region_sep_t *sep) +{ + + sep->bits = ((sep->bits) & 0x7fffffffU); +} + +/**********************/ +static __inline uint32_t +region_next_free_get(region_sep_t *sep) +{ + + return ((sep->bits) & 0x40000000U); +} + +static __inline void +region_next_free_set(region_sep_t *sep) +{ + + sep->bits = ((sep->bits) | 0x40000000U); +} + +static __inline void +region_next_free_unset(region_sep_t *sep) +{ + + sep->bits = ((sep->bits) & 0xbfffffffU); +} + +/**********************/ +static __inline uint32_t +region_next_contig_get(region_sep_t *sep) +{ + + return ((sep->bits) & 0x20000000U); +} + +static __inline void +region_next_contig_set(region_sep_t *sep) +{ + + sep->bits = ((sep->bits) | 0x20000000U); +} + +static __inline void +region_next_contig_unset(region_sep_t *sep) +{ + + sep->bits = ((sep->bits) & 0xdfffffffU); +} + +/********************/ +static __inline size_t +region_next_size_get(region_sep_t *sep) +{ + + return ((size_t) (((sep->bits) & NEXT_SIZE_MASK) << opt_quantum_2pow)); +} + +static __inline void +region_next_size_set(region_sep_t *sep, size_t size) +{ + uint32_t bits; + + assert(size % quantum == 0); + + bits = sep->bits; + bits &= ~NEXT_SIZE_MASK; + bits |= (((uint32_t) size) >> opt_quantum_2pow); + + sep->bits = bits; +} + +#ifdef MALLOC_STATS static void -malloc_init(void) +stats_merge(arena_t *arena, arena_stats_t *stats_arenas) { - const char *p; - char b[64]; - int i, j; - int save_errno = errno; + unsigned i; - INIT_MMAP(); + stats_arenas->nmalloc += arena->stats.nmalloc; + stats_arenas->npalloc += arena->stats.npalloc; + stats_arenas->ncalloc += arena->stats.ncalloc; + stats_arenas->ndalloc += arena->stats.ndalloc; + stats_arenas->nralloc += arena->stats.nralloc; -#ifdef MALLOC_EXTRA_SANITY - malloc_junk = 1; -#endif /* MALLOC_EXTRA_SANITY */ + stats_arenas->nsplit += arena->stats.nsplit; + stats_arenas->ncoalesce += arena->stats.ncoalesce; - for (i = 0; i < 3; i++) { - if (i == 0) { - j = readlink("/etc/malloc.conf", b, sizeof b - 1); - if (j <= 0) - continue; - b[j] = '\0'; - p = b; - } else if (i == 1 && issetugid() == 0) { - p = getenv("MALLOC_OPTIONS"); - } else if (i == 1) { - continue; - } else { - p = _malloc_options; + /* Split. */ + stats_arenas->split.nrequests += arena->stats.split.nrequests; + stats_arenas->split.nserviced += arena->stats.split.nserviced; + + /* Frag. */ + stats_arenas->frag.ncached += arena->stats.frag.ncached; + stats_arenas->frag.nrequests += arena->stats.frag.nrequests; + stats_arenas->frag.nserviced += arena->stats.frag.nserviced; + + /* Bins. */ + for (i = 0; i < NBINS; i++) { + stats_arenas->bins[i].nrequests += + arena->stats.bins[i].nrequests; + stats_arenas->bins[i].nfit += arena->stats.bins[i].nfit; + stats_arenas->bins[i].noverfit += arena->stats.bins[i].noverfit; + if (arena->stats.bins[i].highcached + > stats_arenas->bins[i].highcached) { + stats_arenas->bins[i].highcached + = arena->stats.bins[i].highcached; + } } - for (; p != NULL && *p != '\0'; p++) { - switch (*p) { - case '>': malloc_cache <<= 1; break; - case '<': malloc_cache >>= 1; break; - case 'a': malloc_abort = 0; break; - case 'A': malloc_abort = 1; break; -#if defined(MADV_FREE) - case 'h': malloc_hint = 0; break; - case 'H': malloc_hint = 1; break; + + /* large and large_regions. */ + stats_arenas->large.nrequests += arena->stats.large.nrequests; + stats_arenas->large.nfit += arena->stats.large.nfit; + stats_arenas->large.noverfit += arena->stats.large.noverfit; + if (arena->stats.large.highcached > stats_arenas->large.highcached) + stats_arenas->large.highcached = arena->stats.large.highcached; + stats_arenas->large.curcached += arena->stats.large.curcached; + + /* Huge allocations. */ + stats_arenas->huge.nrequests += arena->stats.huge.nrequests; +} + +static void +stats_print(arena_stats_t *stats_arenas) +{ + unsigned i; + + malloc_printf("calls:\n"); + malloc_printf(" %13s%13s%13s%13s%13s\n", "nmalloc", "npalloc", + "ncalloc", "ndalloc", "nralloc"); + malloc_printf(" %13llu%13llu%13llu%13llu%13llu\n", + stats_arenas->nmalloc, stats_arenas->npalloc, stats_arenas->ncalloc, + stats_arenas->ndalloc, stats_arenas->nralloc); + + malloc_printf("region events:\n"); + malloc_printf(" %13s%13s\n", "nsplit", "ncoalesce"); + malloc_printf(" %13llu%13llu\n", stats_arenas->nsplit, + stats_arenas->ncoalesce); + + malloc_printf("cached split usage:\n"); + malloc_printf(" %13s%13s\n", "nrequests", "nserviced"); + malloc_printf(" %13llu%13llu\n", stats_arenas->split.nrequests, + stats_arenas->split.nserviced); + + malloc_printf("cached frag usage:\n"); + malloc_printf(" %13s%13s%13s\n", "ncached", "nrequests", "nserviced"); + malloc_printf(" %13llu%13llu%13llu\n", stats_arenas->frag.ncached, + stats_arenas->frag.nrequests, stats_arenas->frag.nserviced); + + malloc_printf("bins:\n"); + malloc_printf(" %4s%7s%13s%13s%13s%11s\n", "bin", + "size", "nrequests", "nfit", "noverfit", "highcached"); + for (i = 0; i < NBINS; i++) { + malloc_printf( + " %4u%7u%13llu%13llu%13llu%11lu\n", + i, ((i + bin_shift) << opt_quantum_2pow), + stats_arenas->bins[i].nrequests, stats_arenas->bins[i].nfit, + stats_arenas->bins[i].noverfit, + stats_arenas->bins[i].highcached); + } + + malloc_printf("large:\n"); + malloc_printf(" %13s%13s%13s%13s%13s\n", "nrequests", "nfit", + "noverfit", "highcached", "curcached"); + malloc_printf(" %13llu%13llu%13llu%13lu%13lu\n", + stats_arenas->large.nrequests, stats_arenas->large.nfit, + stats_arenas->large.noverfit, stats_arenas->large.highcached, + stats_arenas->large.curcached); + + malloc_printf("huge\n"); + malloc_printf(" %13s\n", "nrequests"); + malloc_printf(" %13llu\n", stats_arenas->huge.nrequests); +} #endif - case 'r': malloc_realloc = 0; break; - case 'R': malloc_realloc = 1; break; - case 'j': malloc_junk = 0; break; - case 'J': malloc_junk = 1; break; -#ifdef HAS_UTRACE - case 'u': malloc_utrace = 0; break; - case 'U': malloc_utrace = 1; break; -#endif - case 'v': malloc_sysv = 0; break; - case 'V': malloc_sysv = 1; break; - case 'x': malloc_xmalloc = 0; break; - case 'X': malloc_xmalloc = 1; break; - case 'z': malloc_zero = 0; break; - case 'Z': malloc_zero = 1; break; - default: - _malloc_message(_getprogname(), malloc_func, - " warning: ", "unknown char in MALLOC_OPTIONS\n"); - break; - } - } - } - - - UTRACE(0, 0, 0); - - /* - * We want junk in the entire allocation, and zero only in the part - * the user asked for. - */ - if (malloc_zero) - malloc_junk=1; - - /* Allocate one page for the page directory */ - page_dir = (struct pginfo **) MMAP(malloc_pagesize); - - if (page_dir == MAP_FAILED) - wrterror("mmap(2) failed, check limits\n"); - - /* - * We need a maximum of malloc_pageshift buckets, steal these from the - * front of the page_directory; - */ - malloc_origo = ((u_long)pageround((u_long)sbrk(0))) >> malloc_pageshift; - malloc_origo -= malloc_pageshift; - - malloc_ninfo = malloc_pagesize / sizeof *page_dir; - - /* Recalculate the cache size in bytes, and make sure it's nonzero */ - - if (!malloc_cache) - malloc_cache++; - - malloc_cache <<= malloc_pageshift; - - /* - * This is a nice hack from Kaleb Keithly (kaleb@x.org). - * We can sbrk(2) further back when we keep this on a low address. - */ - px = (struct pgfree *) imalloc (sizeof *px); - errno = save_errno; -} /* - * Allocate a number of complete pages + * End Utility functions/macros. */ -static void * -malloc_pages(size_t size) -{ - void *p, *delay_free = NULL; - size_t i; - struct pgfree *pf; - u_long index; - - size = pageround(size); - - p = NULL; - - /* Look for free pages before asking for more */ - for(pf = free_list.next; pf; pf = pf->next) { - -#ifdef MALLOC_EXTRA_SANITY - if (pf->size & malloc_pagemask) - wrterror("(ES): junk length entry on free_list\n"); - if (!pf->size) - wrterror("(ES): zero length entry on free_list\n"); - if (pf->page == pf->end) - wrterror("(ES): zero entry on free_list\n"); - if (pf->page > pf->end) - wrterror("(ES): sick entry on free_list\n"); - if ((void*)pf->page >= (void*)sbrk(0)) - wrterror("(ES): entry on free_list past brk\n"); - if (page_dir[ptr2index(pf->page)] != MALLOC_FREE) - wrterror("(ES): non-free first page on free-list\n"); - if (page_dir[ptr2index(pf->end)-1] != MALLOC_FREE) - wrterror("(ES): non-free last page on free-list\n"); -#endif /* MALLOC_EXTRA_SANITY */ - - if (pf->size < size) - continue; - - if (pf->size == size) { - p = pf->page; - if (pf->next != NULL) - pf->next->prev = pf->prev; - pf->prev->next = pf->next; - delay_free = pf; - break; - } - - p = pf->page; - pf->page = (char *)pf->page + size; - pf->size -= size; - break; - } - -#ifdef MALLOC_EXTRA_SANITY - if (p != NULL && page_dir[ptr2index(p)] != MALLOC_FREE) - wrterror("(ES): allocated non-free page on free-list\n"); -#endif /* MALLOC_EXTRA_SANITY */ - - size >>= malloc_pageshift; - - /* Map new pages */ - if (p == NULL) - p = map_pages(size); - - if (p != NULL) { - - index = ptr2index(p); - page_dir[index] = MALLOC_FIRST; - for (i=1;ibits[0] * - (((malloc_pagesize >> bits)+MALLOC_BITS-1) / MALLOC_BITS); - - /* Don't waste more than two chunks on this */ - if ((1<<(bits)) <= l+l) { - bp = (struct pginfo *)pp; - } else { - bp = (struct pginfo *)imalloc(l); - if (bp == NULL) { - ifree(pp); - return (0); - } - } - - bp->size = (1<shift = bits; - bp->total = bp->free = malloc_pagesize >> bits; - bp->page = pp; - - /* set all valid bits in the bitmap */ - k = bp->total; - i = 0; - - /* Do a bunch at a time */ - for(;k-i >= MALLOC_BITS; i += MALLOC_BITS) - bp->bits[i / MALLOC_BITS] = ~0; - - for(; i < k; i++) - bp->bits[i/MALLOC_BITS] |= 1<<(i%MALLOC_BITS); - - if (bp == bp->page) { - /* Mark the ones we stole for ourselves */ - for(i=0;l > 0;i++) { - bp->bits[i/MALLOC_BITS] &= ~(1<<(i%MALLOC_BITS)); - bp->free--; - bp->total--; - l -= (1 << bits); - } - } - - /* MALLOC_LOCK */ - - page_dir[ptr2index(pp)] = bp; - - bp->next = page_dir[bits]; - page_dir[bits] = bp; - - /* MALLOC_UNLOCK */ - - return (1); -} - -/* - * Allocate a fragment - */ -static void * -malloc_bytes(size_t size) -{ - int i,j; - u_int u; - struct pginfo *bp; - int k; - u_int *lp; - - /* Don't bother with anything less than this */ - if (size < malloc_minsize) - size = malloc_minsize; - - /* Find the right bucket */ - j = 1; - i = size-1; - while (i >>= 1) - j++; - - /* If it's empty, make a page more of that size chunks */ - if (page_dir[j] == NULL && !malloc_make_chunks(j)) - return (NULL); - - bp = page_dir[j]; - - /* Find first word of bitmap which isn't empty */ - for (lp = bp->bits; !*lp; lp++) - ; - - /* Find that bit, and tweak it */ - u = 1; - k = 0; - while (!(*lp & u)) { - u += u; - k++; - } - *lp ^= u; - - /* If there are no more free, remove from free-list */ - if (!--bp->free) { - page_dir[j] = bp->next; - bp->next = NULL; - } - - /* Adjust to the real offset of that chunk */ - k += (lp-bp->bits)*MALLOC_BITS; - k <<= bp->shift; - - if (malloc_junk) - memset((u_char *)bp->page + k, SOME_JUNK, bp->size); - - return ((u_char *)bp->page + k); -} - -/* - * Allocate a piece of memory - */ -static void * -imalloc(size_t size) -{ - void *result; - - if (suicide) - abort(); - - if ((size + malloc_pagesize) < size) /* Check for overflow */ - result = NULL; - else if ((size + malloc_pagesize) >= (uintptr_t)page_dir) - result = NULL; - else if (size <= malloc_maxsize) - result = malloc_bytes(size); - else - result = malloc_pages(size); - - if (malloc_zero && result != NULL) - memset(result, 0, size); - - return (result); -} - -/* - * Change the size of an allocation. - */ -static void * -irealloc(void *ptr, size_t size) -{ - void *p; - u_long osize, index; - struct pginfo **mp; - int i; - - if (suicide) - abort(); - - index = ptr2index(ptr); - - if (index < malloc_pageshift) { - wrtwarning("junk pointer, too low to make sense\n"); - return (NULL); - } - - if (index > last_index) { - wrtwarning("junk pointer, too high to make sense\n"); - return (NULL); - } - - mp = &page_dir[index]; - - if (*mp == MALLOC_FIRST) { /* Page allocation */ - - /* Check the pointer */ - if ((u_long)ptr & malloc_pagemask) { - wrtwarning("modified (page-) pointer\n"); - return (NULL); - } - - /* Find the size in bytes */ - for (osize = malloc_pagesize; *(++mp) == MALLOC_FOLLOW;) - osize += malloc_pagesize; - - if (!malloc_realloc && /* Unless we have to, */ - size <= osize && /* .. or are too small, */ - size > (osize - malloc_pagesize)) { /* .. or can free a page, */ - if (malloc_junk) - memset((u_char *)ptr + size, SOME_JUNK, osize-size); - return (ptr); /* ..don't do anything else. */ - } - - } else if (*mp >= MALLOC_MAGIC) { /* Chunk allocation */ - - /* Check the pointer for sane values */ - if (((u_long)ptr & ((*mp)->size-1))) { - wrtwarning("modified (chunk-) pointer\n"); - return (NULL); - } - - /* Find the chunk index in the page */ - i = ((u_long)ptr & malloc_pagemask) >> (*mp)->shift; - - /* Verify that it isn't a free chunk already */ - if ((*mp)->bits[i/MALLOC_BITS] & (1<<(i%MALLOC_BITS))) { - wrtwarning("chunk is already free\n"); - return (NULL); - } - - osize = (*mp)->size; - - if (!malloc_realloc && /* Unless we have to, */ - size <= osize && /* ..or are too small, */ - (size > osize/2 || /* ..or could use a smaller size, */ - osize == malloc_minsize)) { /* ..(if there is one) */ - if (malloc_junk) - memset((u_char *)ptr + size, SOME_JUNK, osize-size); - return (ptr); /* ..don't do anything else. */ - } - - } else { - wrtwarning("pointer to wrong page\n"); - return (NULL); - } - - p = imalloc(size); - - if (p != NULL) { - /* copy the lesser of the two sizes, and free the old one */ - if (!size || !osize) - ; - else if (osize < size) - memcpy(p, ptr, osize); + if ((size_t) a->chunk < (size_t) b->chunk) + ret = -1; + else if (a->chunk == b->chunk) + ret = 0; else - memcpy(p, ptr, size); - ifree(ptr); - } - return (p); + ret = 1; + + return (ret); } -/* - * Free a sequence of pages - */ +/* Generate red-black tree code for chunks. */ +RB_GENERATE(chunk_tree_s, chunk_node_s, link, chunk_comp); -static __inline void -free_pages(void *ptr, u_long index, struct pginfo const *info) +static __inline int +region_comp(region_node_t *a, region_node_t *b) { - u_long i; - struct pgfree *pf, *pt=NULL; - u_long l; - void *tail; + int ret; + size_t size_a, size_b; - if (info == MALLOC_FREE) { - wrtwarning("page is already free\n"); - return; - } + assert(a != NULL); + assert(b != NULL); - if (info != MALLOC_FIRST) { - wrtwarning("pointer to wrong page\n"); - return; - } + size_a = region_next_size_get(&a->reg->sep); + size_b = region_next_size_get(&b->reg->sep); + if (size_a < size_b) + ret = -1; + else if (size_a == size_b) { + if (a == b) { + /* Regions are equal with themselves. */ + ret = 0; + } else { + if (a->reg->next.u.l.lru) { + /* + * Oldest region comes first (secondary LRU + * ordering). a is guaranteed to be the search + * key, which is how we can enforce this + * secondary ordering. + */ + ret = 1; + } else { + /* + * Oldest region comes last (secondary MRU + * ordering). a is guaranteed to be the search + * key, which is how we can enforce this + * secondary ordering. + */ + ret = -1; + } + } + } else + ret = 1; - if ((u_long)ptr & malloc_pagemask) { - wrtwarning("modified (page-) pointer\n"); - return; - } + return (ret); +} - /* Count how many pages and mark them free at the same time */ - page_dir[index] = MALLOC_FREE; - for (i = 1; page_dir[index+i] == MALLOC_FOLLOW; i++) - page_dir[index + i] = MALLOC_FREE; +/* Generate red-black tree code for regions. */ +RB_GENERATE(region_tree_s, region_node_s, link, region_comp); - l = i << malloc_pageshift; +static void * +pages_map(void *addr, size_t size) +{ + void *ret; - if (malloc_junk) - memset(ptr, SOME_JUNK, l); +#ifdef USE_BRK +AGAIN: +#endif + /* + * We don't use MAP_FIXED here, because it can cause the *replacement* + * of existing mappings, and we only want to create new mappings. + */ + ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + -1, 0); + assert(ret != NULL); -#if defined(MADV_FREE) - if (malloc_hint) - madvise(ptr, l, MADV_FREE); + if (ret == MAP_FAILED) + ret = NULL; + else if (addr != NULL && ret != addr) { + /* + * We succeeded in mapping memory, but not in the right place. + */ + if (munmap(ret, size) == -1) { + char buf[STRERROR_BUF]; + + strerror_r(errno, buf, sizeof(buf)); + malloc_printf("%s: (malloc) Error in munmap(): %s\n", + _getprogname(), buf); + if (opt_abort) + abort(); + } + ret = NULL; + } +#ifdef USE_BRK + else if ((size_t)ret >= (size_t)brk_base + && (size_t)ret < (size_t)brk_max) { + /* + * We succeeded in mapping memory, but at a location that could + * be confused with brk. Leave the mapping intact so that this + * won't ever happen again, then try again. + */ + assert(addr == NULL); + goto AGAIN; + } #endif - tail = (char *)ptr+l; - - /* add to free-list */ - if (px == NULL) - px = imalloc(sizeof *px); /* This cannot fail... */ - px->page = ptr; - px->end = tail; - px->size = l; - if (free_list.next == NULL) { - - /* Nothing on free list, put this at head */ - px->next = free_list.next; - px->prev = &free_list; - free_list.next = px; - pf = px; - px = NULL; - - } else { - - /* Find the right spot, leave pf pointing to the modified entry. */ - tail = (char *)ptr+l; - - for(pf = free_list.next; pf->end < ptr && pf->next != NULL; - pf = pf->next) - ; /* Race ahead here */ - - if (pf->page > tail) { - /* Insert before entry */ - px->next = pf; - px->prev = pf->prev; - pf->prev = px; - px->prev->next = px; - pf = px; - px = NULL; - } else if (pf->end == ptr ) { - /* Append to the previous entry */ - pf->end = (char *)pf->end + l; - pf->size += l; - if (pf->next != NULL && pf->end == pf->next->page ) { - /* And collapse the next too. */ - pt = pf->next; - pf->end = pt->end; - pf->size += pt->size; - pf->next = pt->next; - if (pf->next != NULL) - pf->next->prev = pf; - } - } else if (pf->page == tail) { - /* Prepend to entry */ - pf->size += l; - pf->page = ptr; - } else if (pf->next == NULL) { - /* Append at tail of chain */ - px->next = NULL; - px->prev = pf; - pf->next = px; - pf = px; - px = NULL; - } else { - wrterror("freelist is destroyed\n"); - } - } - - /* Return something to OS ? */ - if (pf->next == NULL && /* If we're the last one, */ - pf->size > malloc_cache && /* ..and the cache is full, */ - pf->end == malloc_brk && /* ..and none behind us, */ - malloc_brk == sbrk(0)) { /* ..and it's OK to do... */ - - /* - * Keep the cache intact. Notice that the '>' above guarantees that - * the pf will always have at least one page afterwards. - */ - pf->end = (char *)pf->page + malloc_cache; - pf->size = malloc_cache; - - brk(pf->end); - malloc_brk = pf->end; - - index = ptr2index(pf->end); - - for(i=index;i <= last_index;) - page_dir[i++] = MALLOC_NOT_MINE; - - last_index = index - 1; - - /* XXX: We could realloc/shrink the pagedir here I guess. */ - } - if (pt != NULL) - ifree(pt); -} - -/* - * Free a chunk, and possibly the page it's on, if the page becomes empty. - */ - -static __inline void -free_bytes(void *ptr, u_long index, struct pginfo *info) -{ - int i; - struct pginfo **mp; - void *vp; - - /* Find the chunk number on the page */ - i = ((u_long)ptr & malloc_pagemask) >> info->shift; - - if (((u_long)ptr & (info->size-1))) { - wrtwarning("modified (chunk-) pointer\n"); - return; - } - - if (info->bits[i/MALLOC_BITS] & (1<<(i%MALLOC_BITS))) { - wrtwarning("chunk is already free\n"); - return; - } - - if (malloc_junk) - memset(ptr, SOME_JUNK, info->size); - - info->bits[i/MALLOC_BITS] |= 1<<(i%MALLOC_BITS); - info->free++; - - mp = page_dir + info->shift; - - if (info->free == 1) { - - /* Page became non-full */ - - mp = page_dir + info->shift; - /* Insert in address order */ - while (*mp && (*mp)->next && (*mp)->next->page < info->page) - mp = &(*mp)->next; - info->next = *mp; - *mp = info; - return; - } - - if (info->free != info->total) - return; - - /* Find & remove this page in the queue */ - while (*mp != info) { - mp = &((*mp)->next); -#ifdef MALLOC_EXTRA_SANITY - if (!*mp) - wrterror("(ES): Not on queue\n"); -#endif /* MALLOC_EXTRA_SANITY */ - } - *mp = info->next; - - /* Free the page & the info structure if need be */ - page_dir[ptr2index(info->page)] = MALLOC_FIRST; - vp = info->page; /* Order is important ! */ - if(vp != (void*)info) - ifree(info); - ifree(vp); + assert(ret == NULL || (addr == NULL && ret != addr) + || (addr != NULL && ret == addr)); + return (ret); } static void -ifree(void *ptr) +pages_unmap(void *addr, size_t size) { - struct pginfo *info; - u_long index; - /* This is legal */ - if (ptr == NULL) - return; + if (munmap(addr, size) == -1) { + char buf[STRERROR_BUF]; - /* If we're already sinking, don't make matters any worse. */ - if (suicide) - return; - - index = ptr2index(ptr); - - if (index < malloc_pageshift) { - wrtwarning("junk pointer, too low to make sense\n"); - return; - } - - if (index > last_index) { - wrtwarning("junk pointer, too high to make sense\n"); - return; - } - - info = page_dir[index]; - - if (info < MALLOC_MAGIC) - free_pages(ptr, index, info); - else - free_bytes(ptr, index, info); - return; + strerror_r(errno, buf, sizeof(buf)); + malloc_printf("%s: (malloc) Error in munmap(): %s\n", + _getprogname(), buf); + if (opt_abort) + abort(); + } } static void * -pubrealloc(void *ptr, size_t size, const char *func) +chunk_alloc(size_t size) { - void *r; - int err = 0; - static int malloc_active; /* Recusion flag for public interface. */ - static unsigned malloc_started; /* Set when initialization has been done */ + void *ret, *chunk; + chunk_node_t *tchunk, *delchunk; + chunk_tree_t delchunks; - /* - * If a thread is inside our code with a functional lock held, and then - * catches a signal which calls us again, we would get a deadlock if the - * lock is not of a recursive type. - */ - _MALLOC_LOCK(); - malloc_func = func; - if (malloc_active > 0) { - if (malloc_active == 1) { - wrtwarning("recursive call\n"); - malloc_active = 2; - } - _MALLOC_UNLOCK(); - errno = EDOOFUS; - return (NULL); - } - malloc_active = 1; + assert(size != 0); + assert(size % chunk_size == 0); - if (!malloc_started) { - if (ptr != NULL) { - wrtwarning("malloc() has never been called\n"); - malloc_active = 0; - _MALLOC_UNLOCK(); - errno = EDOOFUS; - return (NULL); + RB_INIT(&delchunks); + + malloc_mutex_lock(&chunks_mtx); + + if (size == chunk_size) { + /* + * Check for address ranges that were previously chunks and try + * to use them. + */ + + tchunk = RB_MIN(chunk_tree_s, &old_chunks); + while (tchunk != NULL) { + /* Found an address range. Try to recycle it. */ + + chunk = tchunk->chunk; + delchunk = tchunk; + tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk); + + /* + * Remove delchunk from the tree, but keep track of the + * address. + */ + RB_REMOVE(chunk_tree_s, &old_chunks, delchunk); + + /* + * Keep track of the node so that it can be deallocated + * after chunks_mtx is released. + */ + RB_INSERT(chunk_tree_s, &delchunks, delchunk); + +#ifdef USE_BRK + if ((size_t)chunk >= (size_t)brk_base + && (size_t)chunk < (size_t)brk_max) { + /* Re-use a previously freed brk chunk. */ + ret = chunk; + goto RETURN; + } +#endif + if ((ret = pages_map(chunk, size)) != NULL) { + /* Success. */ + goto RETURN; + } + } + +#ifdef USE_BRK + /* + * Try to create chunk-size allocations in brk, in order to + * make full use of limited address space. + */ + if (brk_prev != (void *)-1) { + void *brk_cur; + intptr_t incr; + + /* + * The loop is necessary to recover from races with + * other threads that are using brk for something other + * than malloc. + */ + do { + /* Get the current end of brk. */ + brk_cur = sbrk(0); + + /* + * Calculate how much padding is necessary to + * chunk-align the end of brk. + */ + incr = (char *)chunk_size + - (char *)CHUNK_ADDR2OFFSET(brk_cur); + if (incr == chunk_size) { + ret = brk_cur; + } else { + ret = (char *)brk_cur + incr; + incr += chunk_size; + } + + brk_prev = sbrk(incr); + if (brk_prev == brk_cur) { + /* Success. */ + goto RETURN; + } + } while (brk_prev != (void *)-1); + } +#endif } - malloc_init(); - malloc_started = 1; - } - - if (ptr == ZEROSIZEPTR) - ptr = NULL; - if (malloc_sysv && !size) { - if (ptr != NULL) - ifree(ptr); - r = NULL; - } else if (!size) { - if (ptr != NULL) - ifree(ptr); - r = ZEROSIZEPTR; - } else if (ptr == NULL) { - r = imalloc(size); - err = (r == NULL); - } else { - r = irealloc(ptr, size); - err = (r == NULL); - } - UTRACE(ptr, size, r); - malloc_active = 0; - _MALLOC_UNLOCK(); - if (malloc_xmalloc && err) - wrterror("out of memory\n"); - if (err) - errno = ENOMEM; - return (r); + + /* + * Try to over-allocate, but allow the OS to place the allocation + * anywhere. Beware of size_t wrap-around. + */ + if (size + chunk_size > size) { + if ((ret = pages_map(NULL, size + chunk_size)) != NULL) { + size_t offset = CHUNK_ADDR2OFFSET(ret); + + /* + * Success. Clean up unneeded leading/trailing space. + */ + if (offset != 0) { + /* Leading space. */ + pages_unmap(ret, chunk_size - offset); + + ret = (void *) ((size_t) ret + (chunk_size - + offset)); + + /* Trailing space. */ + pages_unmap((void *) ((size_t) ret + size), + offset); + } else { + /* Trailing space only. */ + pages_unmap((void *) ((size_t) ret + size), + chunk_size); + } + goto RETURN; + } + } + + /* All strategies for allocation failed. */ + ret = NULL; +RETURN: +#ifdef MALLOC_STATS + if (ret != NULL) { + stats_chunks.nchunks += (size / chunk_size); + stats_chunks.curchunks += (size / chunk_size); + } + if (stats_chunks.curchunks > stats_chunks.highchunks) + stats_chunks.highchunks = stats_chunks.curchunks; +#endif + malloc_mutex_unlock(&chunks_mtx); + + /* + * Deallocation of the chunk nodes must be done after releasing + * chunks_mtx, in case deallocation causes a chunk to be unmapped. + */ + tchunk = RB_MIN(chunk_tree_s, &delchunks); + while (tchunk != NULL) { + delchunk = tchunk; + tchunk = RB_NEXT(chunk_tree_s, &delchunks, delchunk); + RB_REMOVE(chunk_tree_s, &delchunks, delchunk); + idalloc(delchunk); + } + + assert(CHUNK_ADDR2BASE(ret) == ret); + return (ret); +} + +static void +chunk_dealloc(void *chunk, size_t size) +{ + + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert(size != 0); + assert(size % chunk_size == 0); + + if (size == chunk_size) { + chunk_node_t *node; + + node = ipalloc(&base_arena, CACHELINE, + sizeof(chunk_node_t) + CACHELINE); + + malloc_mutex_lock(&chunks_mtx); + if (node != NULL) { + /* + * Create a record of this chunk before deallocating + * it, so that the address range can be recycled if + * memory usage increases later on. + */ + node->arena = NULL; + node->chunk = chunk; + node->size = size; + node->extra = 0; + + RB_INSERT(chunk_tree_s, &old_chunks, node); + } + malloc_mutex_unlock(&chunks_mtx); + } + +#ifdef USE_BRK + if ((size_t)chunk >= (size_t)brk_base + && (size_t)chunk < (size_t)brk_max) + madvise(chunk, size, MADV_FREE); + else +#endif + pages_unmap(chunk, size); + +#ifdef MALLOC_STATS + malloc_mutex_lock(&chunks_mtx); + stats_chunks.curchunks -= (size / chunk_size); + malloc_mutex_unlock(&chunks_mtx); +#endif +} + +/******************************************************************************/ +/* + * arena. + */ + +static __inline void +arena_mask_set(arena_t *arena, unsigned bin) +{ + unsigned elm, bit; + + assert(bin < NBINS); + + elm = bin / (sizeof(int) << 3); + bit = bin - (elm * (sizeof(int) << 3)); + assert((arena->bins_mask[elm] & (1 << bit)) == 0); + arena->bins_mask[elm] |= (1 << bit); +} + +static __inline void +arena_mask_unset(arena_t *arena, unsigned bin) +{ + unsigned elm, bit; + + assert(bin < NBINS); + + elm = bin / (sizeof(int) << 3); + bit = bin - (elm * (sizeof(int) << 3)); + assert((arena->bins_mask[elm] & (1 << bit)) != 0); + arena->bins_mask[elm] ^= (1 << bit); +} + +static unsigned +arena_bins_search(arena_t *arena, size_t size) +{ + unsigned ret, minbin, i; + int bit; + + assert(QUANTUM_CEILING(size) == size); + assert((size >> opt_quantum_2pow) >= bin_shift); + + if (size > bin_maxsize) { + ret = UINT_MAX; + goto RETURN; + } + + minbin = (size >> opt_quantum_2pow) - bin_shift; + assert(minbin < NBINS); + for (i = minbin / (sizeof(int) << 3); i < BINMASK_NELMS; i++) { + bit = ffs(arena->bins_mask[i] + & (UINT_MAX << (minbin % (sizeof(int) << 3)))); + if (bit != 0) { + /* Usable allocation found. */ + ret = (i * (sizeof(int) << 3)) + bit - 1; +#ifdef MALLOC_STATS + if (ret == minbin) + arena->stats.bins[minbin].nfit++; + else + arena->stats.bins[ret].noverfit++; +#endif + goto RETURN; + } + } + + ret = UINT_MAX; +RETURN: + return (ret); +} + +static __inline void +arena_delayed_extract(arena_t *arena, region_t *reg) +{ + + if (region_next_contig_get(®->sep)) { + uint32_t slot; + + /* Extract this region from the delayed FIFO. */ + assert(region_next_free_get(®->sep) == false); + + slot = reg->next.u.s.slot; + assert(arena->delayed[slot] == reg); + arena->delayed[slot] = NULL; + } +#ifdef MALLOC_DEBUG + else { + region_t *next; + + assert(region_next_free_get(®->sep)); + + next = (region_t *) &((char *) reg) + [region_next_size_get(®->sep)]; + assert(region_prev_free_get(&next->sep)); + } +#endif +} + +static __inline void +arena_bin_extract(arena_t *arena, unsigned bin, region_t *reg) +{ + arena_bin_t *tbin; + + assert(bin < NBINS); + + tbin = &arena->bins[bin]; + + assert(qr_next(&tbin->regions, next.u.s.link) != &tbin->regions); +#ifdef MALLOC_DEBUG + { + region_t *next; + + next = (region_t *) &((char *) reg) + [region_next_size_get(®->sep)]; + if (region_next_free_get(®->sep)) { + assert(region_prev_free_get(&next->sep)); + assert(region_next_size_get(®->sep) + == next->prev.size); + } else { + assert(region_prev_free_get(&next->sep) == false); + } + } +#endif + assert(region_next_size_get(®->sep) + == ((bin + bin_shift) << opt_quantum_2pow)); + + qr_remove(reg, next.u.s.link); +#ifdef MALLOC_STATS + arena->stats.bins[bin].nregions--; +#endif + if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) + arena_mask_unset(arena, bin); + + arena_delayed_extract(arena, reg); +} + +static __inline void +arena_extract(arena_t *arena, region_t *reg) +{ + size_t size; + + assert(region_next_free_get(®->sep)); +#ifdef MALLOC_DEBUG + { + region_t *next; + + next = (region_t *)&((char *)reg) + [region_next_size_get(®->sep)]; + } +#endif + + assert(reg != arena->split); + assert(reg != arena->frag); + if ((size = region_next_size_get(®->sep)) <= bin_maxsize) { + arena_bin_extract(arena, (size >> opt_quantum_2pow) - bin_shift, + reg); + } else { + RB_REMOVE(region_tree_s, &arena->large_regions, + ®->next.u.l.node); +#ifdef MALLOC_STATS + arena->stats.large.curcached--; +#endif + } +} + +/* Try to coalesce reg with its neighbors. Return NULL if coalescing fails. */ +static bool +arena_coalesce(arena_t *arena, region_t **reg, size_t size) +{ + bool ret; + region_t *prev, *treg, *next, *nextnext; + size_t tsize, prev_size, next_size; + + ret = false; + + treg = *reg; + + /* + * Keep track of the size while coalescing, then just set the size in + * the header/footer once at the end of coalescing. + */ + assert(size == region_next_size_get(&(*reg)->sep)); + tsize = size; + + next = (region_t *)&((char *)treg)[tsize]; + assert(region_next_free_get(&treg->sep)); + assert(region_prev_free_get(&next->sep)); + assert(region_next_size_get(&treg->sep) == next->prev.size); + + while (region_prev_free_get(&treg->sep)) { + prev_size = treg->prev.size; + prev = (region_t *)&((char *)treg)[-prev_size]; + assert(region_next_free_get(&prev->sep)); + + arena_extract(arena, prev); + + tsize += prev_size; + + treg = prev; + +#ifdef MALLOC_STATS + if (ret == false) + arena->stats.ncoalesce++; +#endif + ret = true; + } + + while (region_next_free_get(&next->sep)) { + next_size = region_next_size_get(&next->sep); + nextnext = (region_t *)&((char *)next)[next_size]; + assert(region_prev_free_get(&nextnext->sep)); + + assert(region_next_size_get(&next->sep) == nextnext->prev.size); + + arena_extract(arena, next); + + assert(region_next_size_get(&next->sep) == nextnext->prev.size); + + tsize += next_size; + +#ifdef MALLOC_STATS + if (ret == false) + arena->stats.ncoalesce++; +#endif + ret = true; + + next = (region_t *)&((char *)treg)[tsize]; + } + + /* Update header/footer. */ + if (ret) { + region_next_size_set(&treg->sep, tsize); + next->prev.size = tsize; + } + + /* + * Now that coalescing with adjacent free regions is done, we need to + * try to coalesce with "split" and "frag". Those two regions are + * marked as allocated, which is why this takes special effort. There + * are seven possible cases, but we want to make the (hopefully) common + * case of no coalescence fast, so the checks are optimized for that + * case. The seven cases are: + * + * /------\ + * 0 | treg | No coalescence needed. Make this case fast. + * \------/ + * + * /------+------\ + * 1 | frag | treg | + * \------+------/ + * + * /------+------\ + * 2 | treg | frag | + * \------+------/ + * + * /-------+------\ + * 3 | split | treg | + * \-------+------/ + * + * /------+-------\ + * 4 | treg | split | + * \------+-------/ + * + * /------+------+-------\ + * 5 | frag | treg | split | + * \------+------+-------/ + * + * /-------+------+------\ + * 6 | split | treg | frag | + * \-------+------+------/ + */ + + if (arena->split == NULL) { + /* Cases 3-6 ruled out. */ + } else if ((size_t)next < (size_t)arena->split) { + /* Cases 3-6 ruled out. */ + } else { + region_t *split_next; + size_t split_size; + + split_size = region_next_size_get(&arena->split->sep); + split_next = (region_t *)&((char *)arena->split)[split_size]; + + if ((size_t)split_next < (size_t)treg) { + /* Cases 3-6 ruled out. */ + } else { + /* + * Split is adjacent to treg. Take the slow path and + * coalesce. + */ + + arena_coalesce_hard(arena, treg, next, tsize, true); + + treg = NULL; +#ifdef MALLOC_STATS + if (ret == false) + arena->stats.ncoalesce++; +#endif + ret = true; + goto RETURN; + } + } + + /* If we get here, then cases 3-6 have been ruled out. */ + if (arena->frag == NULL) { + /* Cases 1-6 ruled out. */ + } else if ((size_t)next < (size_t)arena->frag) { + /* Cases 1-6 ruled out. */ + } else { + region_t *frag_next; + size_t frag_size; + + frag_size = region_next_size_get(&arena->frag->sep); + frag_next = (region_t *)&((char *)arena->frag)[frag_size]; + + if ((size_t)frag_next < (size_t)treg) { + /* Cases 1-6 ruled out. */ + } else { + /* + * Frag is adjacent to treg. Take the slow path and + * coalesce. + */ + + arena_coalesce_hard(arena, treg, next, tsize, false); + + treg = NULL; +#ifdef MALLOC_STATS + if (ret == false) + arena->stats.ncoalesce++; +#endif + ret = true; + goto RETURN; + } + } + + /* If we get here, no coalescence with "split" or "frag" was needed. */ + + /* Finish updating header. */ + region_next_contig_unset(&treg->sep); + + assert(region_next_free_get(&treg->sep)); + assert(region_prev_free_get(&next->sep)); + assert(region_prev_free_get(&treg->sep) == false); + assert(region_next_free_get(&next->sep) == false); + +RETURN: + if (ret) + *reg = treg; + return (ret); } /* - * These are the public exported interface routines. + * arena_coalesce() calls this function if it determines that a region needs to + * be coalesced with "split" and/or "frag". + */ +static void +arena_coalesce_hard(arena_t *arena, region_t *reg, region_t *next, size_t size, + bool split_adjacent) +{ + bool frag_adjacent; + + assert(next == (region_t *)&((char *)reg)[size]); + assert(region_next_free_get(®->sep)); + assert(region_next_size_get(®->sep) == size); + assert(region_prev_free_get(&next->sep)); + assert(next->prev.size == size); + + if (split_adjacent == false) + frag_adjacent = true; + else if (arena->frag != NULL) { + /* Determine whether frag will be coalesced with. */ + + if ((size_t)next < (size_t)arena->frag) + frag_adjacent = false; + else { + region_t *frag_next; + size_t frag_size; + + frag_size = region_next_size_get(&arena->frag->sep); + frag_next = (region_t *)&((char *)arena->frag) + [frag_size]; + + if ((size_t)frag_next < (size_t)reg) + frag_adjacent = false; + else + frag_adjacent = true; + } + } else + frag_adjacent = false; + + if (split_adjacent && frag_adjacent) { + region_t *a; + size_t a_size, b_size; + + /* Coalesce all three regions. */ + + if (arena->frag == next) + a = arena->split; + else { + a = arena->frag; + arena->split = a; + } + arena->frag = NULL; + + a_size = region_next_size_get(&a->sep); + assert(a_size == (size_t)reg - (size_t)a); + + b_size = region_next_size_get(&next->sep); + + region_next_size_set(&a->sep, a_size + size + b_size); + assert(region_next_free_get(&a->sep) == false); + } else { + /* Coalesce two regions. */ + + if (split_adjacent) { + size += region_next_size_get(&arena->split->sep); + if (arena->split == next) { + /* reg comes before split. */ + region_next_size_set(®->sep, size); + + assert(region_next_free_get(®->sep)); + region_next_free_unset(®->sep); + + arena->split = reg; + } else { + /* reg comes after split. */ + region_next_size_set(&arena->split->sep, size); + + assert(region_next_free_get(&arena->split->sep) + == false); + + assert(region_prev_free_get(&next->sep)); + region_prev_free_unset(&next->sep); + } + } else { + assert(frag_adjacent); + size += region_next_size_get(&arena->frag->sep); + if (arena->frag == next) { + /* reg comes before frag. */ + region_next_size_set(®->sep, size); + + assert(region_next_free_get(®->sep)); + region_next_free_unset(®->sep); + + arena->frag = reg; + } else { + /* reg comes after frag. */ + region_next_size_set(&arena->frag->sep, size); + + assert(region_next_free_get(&arena->frag->sep) + == false); + + assert(region_prev_free_get(&next->sep)); + region_prev_free_unset(&next->sep); + } + } + } +} + +static __inline void +arena_bin_append(arena_t *arena, unsigned bin, region_t *reg) +{ + arena_bin_t *tbin; + + assert(bin < NBINS); + assert((region_next_size_get(®->sep) >> opt_quantum_2pow) + >= bin_shift); + assert(region_next_size_get(®->sep) + == ((bin + bin_shift) << opt_quantum_2pow)); + + tbin = &arena->bins[bin]; + + if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) + arena_mask_set(arena, bin); + + qr_new(reg, next.u.s.link); + qr_before_insert(&tbin->regions, reg, next.u.s.link); +#ifdef MALLOC_STATS + arena->stats.bins[bin].nregions++; + + if (arena->stats.bins[bin].nregions + > arena->stats.bins[bin].highcached) { + arena->stats.bins[bin].highcached + = arena->stats.bins[bin].nregions; + } +#endif +} + +static __inline void +arena_bin_push(arena_t *arena, unsigned bin, region_t *reg) +{ + arena_bin_t *tbin; + + assert(bin < NBINS); + assert((region_next_size_get(®->sep) >> opt_quantum_2pow) + >= bin_shift); + assert(region_next_size_get(®->sep) + == ((bin + bin_shift) << opt_quantum_2pow)); + + tbin = &arena->bins[bin]; + + if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) + arena_mask_set(arena, bin); + + region_next_contig_unset(®->sep); + qr_new(reg, next.u.s.link); + qr_after_insert(&tbin->regions, reg, next.u.s.link); +#ifdef MALLOC_STATS + arena->stats.bins[bin].nregions++; + + if (arena->stats.bins[bin].nregions + > arena->stats.bins[bin].highcached) { + arena->stats.bins[bin].highcached + = arena->stats.bins[bin].nregions; + } +#endif +} + +static __inline region_t * +arena_bin_pop(arena_t *arena, unsigned bin) +{ + region_t *ret; + arena_bin_t *tbin; + + assert(bin < NBINS); + + tbin = &arena->bins[bin]; + + assert(qr_next(&tbin->regions, next.u.s.link) != &tbin->regions); + + ret = qr_next(&tbin->regions, next.u.s.link); + assert(region_next_size_get(&ret->sep) + == ((bin + bin_shift) << opt_quantum_2pow)); + qr_remove(ret, next.u.s.link); +#ifdef MALLOC_STATS + arena->stats.bins[bin].nregions--; +#endif + if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) + arena_mask_unset(arena, bin); + + arena_delayed_extract(arena, ret); + + if (region_next_free_get(&ret->sep)) { + region_t *next; + + /* Non-delayed region. */ + region_next_free_unset(&ret->sep); + + next = (region_t *)&((char *)ret) + [(bin + bin_shift) << opt_quantum_2pow]; + assert(next->prev.size == region_next_size_get(&ret->sep)); + assert(region_prev_free_get(&next->sep)); + region_prev_free_unset(&next->sep); + } + + return (ret); +} + +static void +arena_large_insert(arena_t *arena, region_t *reg, bool lru) +{ + + assert(region_next_free_get(®->sep)); +#ifdef MALLOC_DEBUG + { + region_t *next; + + next = (region_t *)&((char *)reg) + [region_next_size_get(®->sep)]; + assert(region_prev_free_get(&next->sep)); + assert(next->prev.size == region_next_size_get(®->sep)); + } +#endif + + /* Coalescing should have already been done. */ + assert(arena_coalesce(arena, ®, region_next_size_get(®->sep)) + == false); + + if (region_next_size_get(®->sep) < chunk_size + - (CHUNK_REG_OFFSET + offsetof(region_t, next))) { + /* + * Make sure not to cache a large region with the nextContig + * flag set, in order to simplify the logic that determines + * whether a region needs to be extracted from "delayed". + */ + region_next_contig_unset(®->sep); + + /* Store the region in the large_regions tree. */ + reg->next.u.l.node.reg = reg; + reg->next.u.l.lru = lru; + + RB_INSERT(region_tree_s, &arena->large_regions, + ®->next.u.l.node); +#ifdef MALLOC_STATS + arena->stats.large.curcached++; + if (arena->stats.large.curcached + > arena->stats.large.highcached) { + arena->stats.large.highcached + = arena->stats.large.curcached; + } +#endif + } else { + chunk_node_t *node; + + /* + * This region now spans an entire chunk. Deallocate the chunk. + * + * Note that it is possible for allocation of a large region + * from a pristine chunk, followed by deallocation of the + * region, can cause the chunk to immediately be unmapped. + * This isn't ideal, but 1) such scenarios seem unlikely, and + * 2) delaying coalescence for large regions could cause + * excessive fragmentation for common scenarios. + */ + + node = (chunk_node_t *)CHUNK_ADDR2BASE(reg); + RB_REMOVE(chunk_tree_s, &arena->chunks, node); + arena->nchunks--; + assert(node->chunk == (chunk_node_t *)node); + chunk_dealloc(node->chunk, chunk_size); + } +} + +static void +arena_large_cache(arena_t *arena, region_t *reg, bool lru) +{ + size_t size; + + /* Try to coalesce before storing this region anywhere. */ + size = region_next_size_get(®->sep); + if (arena_coalesce(arena, ®, size)) { + if (reg == NULL) { + /* Region no longer needs cached. */ + return; + } + size = region_next_size_get(®->sep); + } + + arena_large_insert(arena, reg, lru); +} + +static void +arena_lru_cache(arena_t *arena, region_t *reg) +{ + size_t size; + + assert(region_next_free_get(®->sep)); +#ifdef MALLOC_DEBUG + { + region_t *next; + + next = (region_t *)&((char *)reg) + [region_next_size_get(®->sep)]; + assert(region_prev_free_get(&next->sep)); + assert(next->prev.size == region_next_size_get(®->sep)); + } +#endif + assert(region_next_size_get(®->sep) % quantum == 0); + assert(region_next_size_get(®->sep) + >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + + size = region_next_size_get(®->sep); + if (size <= bin_maxsize) { + arena_bin_append(arena, (size >> opt_quantum_2pow) - bin_shift, + reg); + } else + arena_large_cache(arena, reg, true); +} + +static __inline void +arena_mru_cache(arena_t *arena, region_t *reg, size_t size) +{ + + assert(region_next_free_get(®->sep)); +#ifdef MALLOC_DEBUG + { + region_t *next; + + next = (region_t *)&((char *)reg) + [region_next_size_get(®->sep)]; + assert(region_prev_free_get(&next->sep)); + assert(next->prev.size == region_next_size_get(®->sep)); + } +#endif + assert(region_next_size_get(®->sep) % quantum == 0); + assert(region_next_size_get(®->sep) + >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + assert(size == region_next_size_get(®->sep)); + + if (size <= bin_maxsize) { + arena_bin_push(arena, (size >> opt_quantum_2pow) - bin_shift, + reg); + } else + arena_large_cache(arena, reg, false); +} + +static __inline void +arena_undelay(arena_t *arena, uint32_t slot) +{ + region_t *reg, *next; + size_t size; + + assert(slot == arena->next_delayed); + assert(arena->delayed[slot] != NULL); + + /* Try to coalesce reg. */ + reg = arena->delayed[slot]; + + size = region_next_size_get(®->sep); + + assert(region_next_contig_get(®->sep)); + assert(reg->next.u.s.slot == slot); + + arena_bin_extract(arena, (size >> opt_quantum_2pow) - bin_shift, reg); + + arena->delayed[slot] = NULL; + + next = (region_t *) &((char *) reg)[size]; + + region_next_free_set(®->sep); + region_prev_free_set(&next->sep); + next->prev.size = size; + + if (arena_coalesce(arena, ®, size) == false) { + /* Coalescing failed. Cache this region. */ + arena_mru_cache(arena, reg, size); + } else { + /* Coalescing succeeded. */ + + if (reg == NULL) { + /* Region no longer needs undelayed. */ + return; + } + + if (region_next_size_get(®->sep) < chunk_size + - (CHUNK_REG_OFFSET + offsetof(region_t, next))) { + /* + * Insert coalesced region into appropriate bin (or + * largeRegions). + */ + arena_lru_cache(arena, reg); + } else { + chunk_node_t *node; + + /* + * This region now spans an entire chunk. Deallocate + * the chunk. + */ + + node = (chunk_node_t *) CHUNK_ADDR2BASE(reg); + RB_REMOVE(chunk_tree_s, &arena->chunks, node); + arena->nchunks--; + assert(node->chunk == (chunk_node_t *) node); + chunk_dealloc(node->chunk, chunk_size); + } + } +} + +static void +arena_delay_cache(arena_t *arena, region_t *reg) +{ + region_t *next; + size_t size; + + assert(region_next_free_get(®->sep) == false); + assert(region_next_size_get(®->sep) % quantum == 0); + assert(region_next_size_get(®->sep) + >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + + size = region_next_size_get(®->sep); + next = (region_t *)&((char *)reg)[size]; + assert(region_prev_free_get(&next->sep) == false); + + if (size <= bin_maxsize) { + if (region_next_contig_get(®->sep)) { + uint32_t slot; + + /* Insert into delayed. */ + + /* + * Clear a slot, then put reg in it. We have to loop + * here, because in the case of base_arena, it's + * possible for this loop to cause deallocation if a + * chunk is discarded. + */ + for (slot = arena->next_delayed; + arena->delayed[slot] != NULL; + slot = arena->next_delayed) + arena_undelay(arena, slot); + + reg->next.u.s.slot = slot; + + arena->delayed[slot] = reg; + + /* Update next_delayed. */ + slot++; + slot &= (opt_ndelay - 1); /* Handle wrap-around. */ + arena->next_delayed = slot; + + arena_bin_append(arena, (size >> opt_quantum_2pow) + - bin_shift, reg); + } else { + /* + * This region was a fragment when it was allocated, so + * don't delay coalescence for it. + */ + + region_next_free_set(®->sep); + region_prev_free_set(&next->sep); + next->prev.size = size; + + if (arena_coalesce(arena, ®, size)) { + /* Coalescing succeeded. */ + + if (reg == NULL) { + /* Region no longer needs cached. */ + return; + } + + size = region_next_size_get(®->sep); + } + + arena_mru_cache(arena, reg, size); + } + } else { + region_next_free_set(®->sep); + region_prev_free_set(&next->sep); + region_next_contig_unset(®->sep); + next->prev.size = size; + + arena_large_cache(arena, reg, true); + } +} + +static __inline region_t * +arena_frag_reg_alloc(arena_t *arena, size_t size, bool fit) +{ + region_t *ret; + + /* + * Try to fill frag if it's empty. Frag needs to be marked as + * allocated. + */ + if (arena->frag == NULL) { + region_node_t *node; + + node = RB_MIN(region_tree_s, &arena->large_regions); + if (node != NULL) { + region_t *frag, *next; + + RB_REMOVE(region_tree_s, &arena->large_regions, node); + + frag = node->reg; +#ifdef MALLOC_STATS + arena->stats.frag.ncached++; +#endif + assert(region_next_free_get(&frag->sep)); + region_next_free_unset(&frag->sep); + + next = (region_t *)&((char *)frag)[region_next_size_get( + &frag->sep)]; + assert(region_prev_free_get(&next->sep)); + region_prev_free_unset(&next->sep); + + arena->frag = frag; + } + } + + if (arena->frag != NULL) { +#ifdef MALLOC_STATS + arena->stats.frag.nrequests++; +#endif + + if (region_next_size_get(&arena->frag->sep) >= size) { + if (fit) { + size_t total_size; + + /* + * Use frag, but try to use the beginning for + * smaller regions, and the end for larger + * regions. This reduces fragmentation in some + * pathological use cases. It tends to group + * short-lived (smaller) regions, which + * increases the effectiveness of coalescing. + */ + + total_size = + region_next_size_get(&arena->frag->sep); + assert(size % quantum == 0); + + if (total_size - size >= QUANTUM_CEILING( + sizeof(region_small_sizer_t))) { + if (size <= bin_maxsize) { + region_t *next; + + /* + * Carve space from the + * beginning of frag. + */ + + /* ret. */ + ret = arena->frag; + region_next_size_set(&ret->sep, + size); + assert(region_next_free_get( + &ret->sep) == false); + + /* next. */ + next = (region_t *)&((char *) + ret)[size]; + region_next_size_set(&next->sep, + total_size - size); + assert(size >= + QUANTUM_CEILING(sizeof( + region_small_sizer_t))); + region_prev_free_unset( + &next->sep); + region_next_free_unset( + &next->sep); + + /* Update frag. */ + arena->frag = next; + } else { + region_t *prev; + size_t prev_size; + + /* + * Carve space from the end of + * frag. + */ + + /* prev. */ + prev_size = total_size - size; + prev = arena->frag; + region_next_size_set(&prev->sep, + prev_size); + assert(prev_size >= + QUANTUM_CEILING(sizeof( + region_small_sizer_t))); + assert(region_next_free_get( + &prev->sep) == false); + + /* ret. */ + ret = (region_t *)&((char *) + prev)[prev_size]; + region_next_size_set(&ret->sep, + size); + region_prev_free_unset( + &ret->sep); + region_next_free_unset( + &ret->sep); + +#ifdef MALLOC_DEBUG + { + region_t *next; + + /* next. */ + next = (region_t *)&((char *) ret) + [region_next_size_get(&ret->sep)]; + assert(region_prev_free_get(&next->sep) + == false); + } +#endif + } +#ifdef MALLOC_STATS + arena->stats.nsplit++; +#endif + } else { + /* + * frag is close enough to the right + * size that there isn't enough room to + * create a neighboring region. + */ + + /* ret. */ + ret = arena->frag; + arena->frag = NULL; + assert(region_next_free_get(&ret->sep) + == false); + +#ifdef MALLOC_DEBUG + { + region_t *next; + + /* next. */ + next = (region_t *)&((char *) + ret)[region_next_size_get( + &ret->sep)]; + assert(region_prev_free_get( + &next->sep) == false); + } +#endif + } +#ifdef MALLOC_STATS + arena->stats.frag.nserviced++; +#endif + } else { + /* Don't fit to the allocation size. */ + + /* ret. */ + ret = arena->frag; + arena->frag = NULL; + assert(region_next_free_get(&ret->sep) + == false); + +#ifdef MALLOC_DEBUG + { + region_t *next; + + /* next. */ + next = (region_t *) &((char *) ret) + [region_next_size_get(&ret->sep)]; + assert(region_prev_free_get(&next->sep) + == false); + } +#endif + } + region_next_contig_set(&ret->sep); + goto RETURN; + } else if (size <= bin_maxsize) { + region_t *reg; + + /* + * The frag region is too small to service a small + * request. Clear frag. + */ + + reg = arena->frag; + region_next_contig_set(®->sep); + + arena->frag = NULL; + + arena_delay_cache(arena, reg); + } + } + + ret = NULL; +RETURN: + return (ret); +} + +static region_t * +arena_split_reg_alloc(arena_t *arena, size_t size, bool fit) +{ + region_t *ret; + + if (arena->split != NULL) { +#ifdef MALLOC_STATS + arena->stats.split.nrequests++; +#endif + + if (region_next_size_get(&arena->split->sep) >= size) { + if (fit) { + size_t total_size; + + /* + * Use split, but try to use the beginning for + * smaller regions, and the end for larger + * regions. This reduces fragmentation in some + * pathological use cases. It tends to group + * short-lived (smaller) regions, which + * increases the effectiveness of coalescing. + */ + + total_size = + region_next_size_get(&arena->split->sep); + assert(size % quantum == 0); + + if (total_size - size >= QUANTUM_CEILING( + sizeof(region_small_sizer_t))) { + if (size <= bin_maxsize) { + region_t *next; + + /* + * Carve space from the + * beginning of split. + */ + + /* ret. */ + ret = arena->split; + region_next_size_set(&ret->sep, + size); + assert(region_next_free_get( + &ret->sep) == false); + + /* next. */ + next = (region_t *)&((char *) + ret)[size]; + region_next_size_set(&next->sep, + total_size - size); + assert(size >= + QUANTUM_CEILING(sizeof( + region_small_sizer_t))); + region_prev_free_unset( + &next->sep); + region_next_free_unset( + &next->sep); + + /* Update split. */ + arena->split = next; + } else { + region_t *prev; + size_t prev_size; + + /* + * Carve space from the end of + * split. + */ + + /* prev. */ + prev_size = total_size - size; + prev = arena->split; + region_next_size_set(&prev->sep, + prev_size); + assert(prev_size >= + QUANTUM_CEILING(sizeof( + region_small_sizer_t))); + assert(region_next_free_get( + &prev->sep) == false); + + /* ret. */ + ret = (region_t *)&((char *) + prev)[prev_size]; + region_next_size_set(&ret->sep, + size); + region_prev_free_unset( + &ret->sep); + region_next_free_unset( + &ret->sep); + +#ifdef MALLOC_DEBUG + { + region_t *next; + + /* next. */ + next = (region_t *)&((char *) ret) + [region_next_size_get(&ret->sep)]; + assert(region_prev_free_get(&next->sep) + == false); + } +#endif + } +#ifdef MALLOC_STATS + arena->stats.nsplit++; +#endif + } else { + /* + * split is close enough to the right + * size that there isn't enough room to + * create a neighboring region. + */ + + /* ret. */ + ret = arena->split; + arena->split = NULL; + assert(region_next_free_get(&ret->sep) + == false); + +#ifdef MALLOC_DEBUG + { + region_t *next; + + /* next. */ + next = (region_t *)&((char *) + ret)[region_next_size_get( + &ret->sep)]; + assert(region_prev_free_get( + &next->sep) == false); + } +#endif + } + +#ifdef MALLOC_STATS + arena->stats.split.nserviced++; +#endif + } else { + /* Don't fit to the allocation size. */ + + /* ret. */ + ret = arena->split; + arena->split = NULL; + assert(region_next_free_get(&ret->sep) + == false); + +#ifdef MALLOC_DEBUG + { + region_t *next; + + /* next. */ + next = (region_t *) &((char *) ret) + [region_next_size_get(&ret->sep)]; + assert(region_prev_free_get(&next->sep) + == false); + } +#endif + } + region_next_contig_set(&ret->sep); + goto RETURN; + } else if (size <= bin_maxsize) { + region_t *reg; + + /* + * The split region is too small to service a small + * request. Clear split. + */ + + reg = arena->split; + region_next_contig_set(®->sep); + + arena->split = NULL; + + arena_delay_cache(arena, reg); + } + } + + ret = NULL; +RETURN: + return (ret); +} + +/* + * Split reg if necessary. The region must be overly large enough to be able + * to contain a trailing region. + */ +static void +arena_reg_fit(arena_t *arena, size_t size, region_t *reg, bool restore_split) +{ + assert(QUANTUM_CEILING(size) == size); + assert(region_next_free_get(®->sep) == 0); + + if (region_next_size_get(®->sep) + >= size + QUANTUM_CEILING(sizeof(region_small_sizer_t))) { + size_t total_size; + region_t *next; + + total_size = region_next_size_get(®->sep); + + region_next_size_set(®->sep, size); + + next = (region_t *) &((char *) reg)[size]; + region_next_size_set(&next->sep, total_size - size); + assert(region_next_size_get(&next->sep) + >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + region_prev_free_unset(&next->sep); + + if (restore_split) { + /* Restore what's left to "split". */ + region_next_free_unset(&next->sep); + arena->split = next; + } else if (arena->frag == NULL && total_size - size + > bin_maxsize) { + /* This region is large enough to use for "frag". */ + region_next_free_unset(&next->sep); + arena->frag = next; + } else { + region_t *nextnext; + size_t next_size; + + region_next_free_set(&next->sep); + + assert(region_next_size_get(&next->sep) == total_size + - size); + next_size = total_size - size; + nextnext = (region_t *) &((char *) next)[next_size]; + nextnext->prev.size = next_size; + assert(region_prev_free_get(&nextnext->sep) == false); + region_prev_free_set(&nextnext->sep); + + arena_mru_cache(arena, next, next_size); + } + +#ifdef MALLOC_STATS + arena->stats.nsplit++; +#endif + } +} + +static __inline region_t * +arena_bin_reg_alloc(arena_t *arena, size_t size, bool fit) +{ + region_t *ret, *header; + unsigned bin; + + /* + * Look for an exact fit in bins (region cached in smallest possible + * bin). + */ + bin = (size >> opt_quantum_2pow) - bin_shift; +#ifdef MALLOC_STATS + arena->stats.bins[bin].nrequests++; +#endif + header = &arena->bins[bin].regions; + if (qr_next(header, next.u.s.link) != header) { + /* Exact fit. */ + ret = arena_bin_pop(arena, bin); + assert(region_next_size_get(&ret->sep) >= size); +#ifdef MALLOC_STATS + arena->stats.bins[bin].nfit++; +#endif + goto RETURN; + } + + /* Look at frag to see whether it's large enough. */ + ret = arena_frag_reg_alloc(arena, size, fit); + if (ret != NULL) + goto RETURN; + + /* Look in all bins for a large enough region. */ + if ((bin = arena_bins_search(arena, size)) == (size >> opt_quantum_2pow) + - bin_shift) { + /* Over-fit. */ + ret = arena_bin_pop(arena, bin); + assert(region_next_size_get(&ret->sep) >= size); + + if (fit) + arena_reg_fit(arena, size, ret, false); + +#ifdef MALLOC_STATS + arena->stats.bins[bin].noverfit++; +#endif + goto RETURN; + } + + ret = NULL; +RETURN: + return (ret); +} + +/* Look in large_regions for a large enough region. */ +static region_t * +arena_large_reg_alloc(arena_t *arena, size_t size, bool fit) +{ + region_t *ret, *next; + region_node_t *node; + region_t key; + +#ifdef MALLOC_STATS + arena->stats.large.nrequests++; +#endif + + key.next.u.l.node.reg = &key; + key.next.u.l.lru = true; + region_next_size_set(&key.sep, size); + node = RB_NFIND(region_tree_s, &arena->large_regions, + &key.next.u.l.node); + if (node == NULL) { + ret = NULL; + goto RETURN; + } + + /* Cached large region found. */ + ret = node->reg; + assert(region_next_free_get(&ret->sep)); + + RB_REMOVE(region_tree_s, &arena->large_regions, node); +#ifdef MALLOC_STATS + arena->stats.large.curcached--; +#endif + + region_next_free_unset(&ret->sep); + + next = (region_t *)&((char *)ret)[region_next_size_get(&ret->sep)]; + assert(region_prev_free_get(&next->sep)); + region_prev_free_unset(&next->sep); + + if (fit) + arena_reg_fit(arena, size, ret, false); + +#ifdef MALLOC_STATS + if (size > bin_maxsize) + arena->stats.large.nfit++; + else + arena->stats.large.noverfit++; +#endif + +RETURN: + return (ret); +} + +/* Allocate a new chunk and create a single region from it. */ +static region_t * +arena_chunk_reg_alloc(arena_t *arena, size_t size, bool fit) +{ + region_t *ret, *next; + chunk_node_t *chunk; + + chunk = chunk_alloc(chunk_size); + if (chunk == NULL) { + ret = NULL; + goto RETURN; + } + +#ifdef MALLOC_DEBUG + { + chunk_node_t *tchunk; + chunk_node_t key; + + key.chunk = chunk; + tchunk = RB_FIND(chunk_tree_s, &arena->chunks, &key); + assert(tchunk == NULL); + } +#endif + chunk->arena = arena; + chunk->chunk = chunk; + chunk->size = chunk_size; + chunk->extra = 0; + + RB_INSERT(chunk_tree_s, &arena->chunks, chunk); + arena->nchunks++; + + /* Carve a region from the new chunk. */ + ret = (region_t *) &((char *) chunk)[CHUNK_REG_OFFSET]; + region_next_size_set(&ret->sep, chunk_size - (CHUNK_REG_OFFSET + + offsetof(region_t, next))); + region_prev_free_unset(&ret->sep); + region_next_free_unset(&ret->sep); + + /* Create a separator at the end of this new region. */ + next = (region_t *)&((char *)ret)[region_next_size_get(&ret->sep)]; + region_next_size_set(&next->sep, 0); + region_prev_free_unset(&next->sep); + region_next_free_unset(&next->sep); + region_next_contig_unset(&next->sep); + + if (fit) + arena_reg_fit(arena, size, ret, (arena->split == NULL)); + +RETURN: + return (ret); +} + +/* + * Find a region that is at least as large as aSize, and return a pointer to + * the separator that precedes the region. The return value is ready for use, + * though it may be larger than is necessary if fit is false. + */ +static __inline region_t * +arena_reg_alloc(arena_t *arena, size_t size, bool fit) +{ + region_t *ret; + + assert(QUANTUM_CEILING(size) == size); + assert(size >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + assert(size <= (chunk_size >> 1)); + + if (size <= bin_maxsize) { + ret = arena_bin_reg_alloc(arena, size, fit); + if (ret != NULL) + goto RETURN; + } + + ret = arena_large_reg_alloc(arena, size, fit); + if (ret != NULL) + goto RETURN; + + ret = arena_split_reg_alloc(arena, size, fit); + if (ret != NULL) + goto RETURN; + + /* + * Only try allocating from frag here if size is large, since + * arena_bin_reg_alloc() already falls back to allocating from frag for + * small regions. + */ + if (size > bin_maxsize) { + ret = arena_frag_reg_alloc(arena, size, fit); + if (ret != NULL) + goto RETURN; + } + + ret = arena_chunk_reg_alloc(arena, size, fit); + if (ret != NULL) + goto RETURN; + + ret = NULL; +RETURN: + return (ret); +} + +static void * +arena_malloc(arena_t *arena, size_t size) +{ + void *ret; + region_t *reg; + size_t quantum_size; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(size != 0); + assert(region_ceiling(size) <= (chunk_size >> 1)); + + quantum_size = region_ceiling(size); + if (quantum_size < size) { + /* size is large enough to cause size_t wrap-around. */ + ret = NULL; + goto RETURN; + } + assert(quantum_size >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + + malloc_mutex_lock(&arena->mtx); + reg = arena_reg_alloc(arena, quantum_size, true); + if (reg == NULL) { + malloc_mutex_unlock(&arena->mtx); + ret = NULL; + goto RETURN; + } + +#ifdef MALLOC_STATS + arena->allocated += quantum_size; +#endif + + malloc_mutex_unlock(&arena->mtx); + + ret = (void *)®->next; +#ifdef MALLOC_REDZONES + { + region_t *next; + size_t total_size; + + memset(reg->sep.next_red, 0xa5, MALLOC_RED); + + /* + * Unused trailing space in the region is considered part of the + * trailing redzone. + */ + total_size = region_next_size_get(®->sep); + assert(total_size >= size); + memset(&((char *)ret)[size], 0xa5, + total_size - size - sizeof(region_sep_t)); + + reg->sep.next_exact_size = size; + + next = (region_t *)&((char *)reg)[total_size]; + memset(next->sep.prev_red, 0xa5, MALLOC_RED); + } +#endif +RETURN: + return (ret); +} + +static void * +arena_palloc(arena_t *arena, size_t alignment, size_t size) +{ + void *ret; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + + if (alignment <= quantum) { + /* + * The requested alignment is always guaranteed, so use the + * normal allocation function. + */ + ret = arena_malloc(arena, size); + } else { + region_t *reg, *old_split; + size_t quantum_size, alloc_size, offset, total_size; + + /* + * Allocate more space than necessary, then carve an aligned + * region out of it. The smallest allowable region is + * potentially a multiple of the quantum size, so care must be + * taken to carve things up such that all resulting regions are + * large enough. + */ + + quantum_size = region_ceiling(size); + if (quantum_size < size) { + /* size is large enough to cause size_t wrap-around. */ + ret = NULL; + goto RETURN; + } + + /* + * Calculate how large of a region to allocate. There must be + * enough space to advance far enough to have at least + * sizeof(region_small_sizer_t) leading bytes, yet also land at + * an alignment boundary. + */ + if (alignment >= sizeof(region_small_sizer_t)) { + alloc_size = + QUANTUM_CEILING(sizeof(region_small_sizer_t)) + + alignment + quantum_size; + } else { + alloc_size = + (QUANTUM_CEILING(sizeof(region_small_sizer_t)) << 1) + + quantum_size; + } + + if (alloc_size < quantum_size) { + /* size_t wrap-around occurred. */ + ret = NULL; + goto RETURN; + } + + malloc_mutex_lock(&arena->mtx); + old_split = arena->split; + reg = arena_reg_alloc(arena, alloc_size, false); + if (reg == NULL) { + malloc_mutex_unlock(&arena->mtx); + ret = NULL; + goto RETURN; + } + if (reg == old_split) { + /* + * We requested a non-fit allocation that was serviced + * by split, which means that we need to take care to + * restore split in the arena_reg_fit() call later on. + * + * Do nothing; a non-NULL old_split will be used as the + * signal to restore split. + */ + } else + old_split = NULL; + + total_size = region_next_size_get(®->sep); + + if (alignment > bin_maxsize || size > bin_maxsize) { + size_t split_size, p; + + /* + * Put this allocation toward the end of reg, since + * it is large, and we try to put all large regions at + * the end of split regions. + */ + split_size = region_next_size_get(®->sep); + p = (size_t)&((char *)®->next)[split_size]; + p -= offsetof(region_t, next); + p -= size; + p &= ~(alignment - 1); + p -= offsetof(region_t, next); + + offset = p - (size_t)reg; + } else { + if ((((size_t)®->next) & (alignment - 1)) != 0) { + size_t p; + + /* + * reg is unaligned. Calculate the offset into + * reg to actually base the allocation at. + */ + p = ((size_t)®->next + alignment) + & ~(alignment - 1); + while (p - (size_t)®->next + < QUANTUM_CEILING(sizeof( + region_small_sizer_t))) + p += alignment; + p -= offsetof(region_t, next); + + offset = p - (size_t)reg; + } else + offset = 0; + } + assert(offset % quantum == 0); + assert(offset < total_size); + + if (offset != 0) { + region_t *prev; + + /* + * Move ret to an alignment boundary that is far enough + * past the beginning of the allocation to leave a + * leading free region, then trim the leading space. + */ + + assert(offset >= QUANTUM_CEILING( + sizeof(region_small_sizer_t))); + assert(offset + size <= total_size); + + prev = reg; + reg = (region_t *)&((char *)prev)[offset]; + assert(((size_t)®->next & (alignment - 1)) == 0); + + /* prev. */ + region_next_size_set(&prev->sep, offset); + reg->prev.size = offset; + + /* reg. */ + region_next_size_set(®->sep, total_size - offset); + region_next_free_unset(®->sep); + if (region_next_contig_get(&prev->sep)) + region_next_contig_set(®->sep); + else + region_next_contig_unset(®->sep); + + if (old_split != NULL && (alignment > bin_maxsize + || size > bin_maxsize)) { + /* Restore to split. */ + region_prev_free_unset(®->sep); + + arena->split = prev; + old_split = NULL; + } else { + region_next_free_set(&prev->sep); + region_prev_free_set(®->sep); + + arena_mru_cache(arena, prev, offset); + } +#ifdef MALLOC_STATS + arena->stats.nsplit++; +#endif + } + + arena_reg_fit(arena, quantum_size, reg, (old_split != NULL)); + +#ifdef MALLOC_STATS + arena->allocated += quantum_size; +#endif + + malloc_mutex_unlock(&arena->mtx); + + ret = (void *)®->next; +#ifdef MALLOC_REDZONES + { + region_t *next; + size_t total_size; + + memset(reg->sep.next_red, 0xa5, MALLOC_RED); + + /* + * Unused trailing space in the region is considered + * part of the trailing redzone. + */ + total_size = region_next_size_get(®->sep); + assert(total_size >= size); + memset(&((char *)ret)[size], 0xa5, + total_size - size - sizeof(region_sep_t)); + + reg->sep.next_exact_size = size; + + next = (region_t *)&((char *)reg)[total_size]; + memset(next->sep.prev_red, 0xa5, MALLOC_RED); + } +#endif + } + +RETURN: + assert(((size_t)ret & (alignment - 1)) == 0); + return (ret); +} + +static void * +arena_calloc(arena_t *arena, size_t num, size_t size) +{ + void *ret; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(num * size != 0); + + ret = arena_malloc(arena, num * size); + if (ret == NULL) + goto RETURN; + + memset(ret, 0, num * size); + +RETURN: + return (ret); +} + +static size_t +arena_salloc(arena_t *arena, void *ptr) +{ + size_t ret; + region_t *reg; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(ptr != NULL); + assert(ptr != &nil); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + reg = (region_t *)&((char *)ptr)[-offsetof(region_t, next)]; + + ret = region_next_size_get(®->sep); + + return (ret); +} + +#ifdef MALLOC_REDZONES +static void +redzone_check(void *ptr) +{ + region_t *reg, *next; + size_t size; + unsigned i, ncorruptions; + + ncorruptions = 0; + + reg = (region_t *)&((char *)ptr)[-offsetof(region_t, next)]; + size = region_next_size_get(®->sep); + next = (region_t *)&((char *)reg)[size]; + + /* Leading redzone. */ + for (i = 0; i < MALLOC_RED; i++) { + if ((unsigned char)reg->sep.next_red[i] != 0xa5) { + size_t offset = (size_t)MALLOC_RED - i; + + ncorruptions++; + malloc_printf("%s: (malloc) Corrupted redzone %zu " + "byte%s before %p (0x%x)\n", _getprogname(), + offset, offset > 1 ? "s" : "", ptr, + (unsigned char)reg->sep.next_red[i]); + } + } + memset(®->sep.next_red, 0x5a, MALLOC_RED); + + /* Bytes immediately trailing allocation. */ + for (i = 0; i < size - reg->sep.next_exact_size - sizeof(region_sep_t); + i++) { + if ((unsigned char)((char *)ptr)[reg->sep.next_exact_size + i] + != 0xa5) { + size_t offset = (size_t)(i + 1); + + ncorruptions++; + malloc_printf("%s: (malloc) Corrupted redzone %zu " + "byte%s after %p (size %zu) (0x%x)\n", + _getprogname(), offset, offset > 1 ? "s" : "", ptr, + reg->sep.next_exact_size, (unsigned char)((char *) + ptr)[reg->sep.next_exact_size + i]); + } + } + memset(&((char *)ptr)[reg->sep.next_exact_size], 0x5a, + size - reg->sep.next_exact_size - sizeof(region_sep_t)); + + /* Trailing redzone. */ + for (i = 0; i < MALLOC_RED; i++) { + if ((unsigned char)next->sep.prev_red[i] != 0xa5) { + size_t offset = (size_t)(size - reg->sep.next_exact_size + - sizeof(region_sep_t) + i + 1); + + ncorruptions++; + malloc_printf("%s: (malloc) Corrupted redzone %zu " + "byte%s after %p (size %zu) (0x%x)\n", + _getprogname(), offset, offset > 1 ? "s" : "", ptr, + reg->sep.next_exact_size, + (unsigned char)next->sep.prev_red[i]); + } + } + memset(&next->sep.prev_red, 0x5a, MALLOC_RED); + + if (opt_abort && ncorruptions != 0) + abort(); + + reg->sep.next_exact_size = 0; +} +#endif + +static void +arena_dalloc(arena_t *arena, void *ptr) +{ + region_t *reg; + + assert(arena != NULL); + assert(ptr != NULL); + assert(ptr != &nil); + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + reg = (region_t *)&((char *)ptr)[-offsetof(region_t, next)]; + + malloc_mutex_lock(&arena->mtx); + +#ifdef MALLOC_DEBUG + { + chunk_node_t *chunk, *node; + chunk_node_t key; + + chunk = CHUNK_ADDR2BASE(ptr); + assert(chunk->arena == arena); + + key.chunk = chunk; + node = RB_FIND(chunk_tree_s, &arena->chunks, &key); + assert(node == chunk); + } +#endif +#ifdef MALLOC_REDZONES + redzone_check(ptr); +#endif + +#ifdef MALLOC_STATS + arena->allocated -= region_next_size_get(®->sep); +#endif + + if (opt_junk) { + memset(®->next, 0x5a, + region_next_size_get(®->sep) - sizeof(region_sep_t)); + } + + arena_delay_cache(arena, reg); + + malloc_mutex_unlock(&arena->mtx); +} + +#ifdef NOT_YET +static void * +arena_ralloc(arena_t *arena, void *ptr, size_t size) +{ + + /* + * Arenas don't need to support ralloc, since all reallocation is done + * by allocating new space and copying. This function should never be + * called. + */ + /* NOTREACHED */ + assert(false); + + return (NULL); +} +#endif + +#ifdef MALLOC_STATS +static bool +arena_stats(arena_t *arena, size_t *allocated, size_t *total) +{ + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(allocated != NULL); + assert(total != NULL); + + malloc_mutex_lock(&arena->mtx); + *allocated = arena->allocated; + *total = arena->nchunks * chunk_size; + malloc_mutex_unlock(&arena->mtx); + + return (false); +} +#endif + +static void +arena_new(arena_t *arena, bool malloced, bool recursive) +{ + unsigned i; + + arena->malloced = malloced; + + if (recursive) + malloc_mutex_recursive_init(&arena->mtx); + else + malloc_mutex_init(&arena->mtx); + + for (i = 0; i < NBINS; i++) + qr_new(&arena->bins[i].regions, next.u.s.link); + + for (i = 0; i < BINMASK_NELMS; i++) + arena->bins_mask[i] = 0; + + arena->split = NULL; + arena->frag = NULL; + RB_INIT(&arena->large_regions); + + RB_INIT(&arena->chunks); + arena->nchunks = 0; + +#ifdef MALLOC_STATS + arena->allocated = 0; + + memset(&arena->stats, 0, sizeof(arena_stats_t)); +#endif + +#ifdef MALLOC_DEBUG + arena->magic = ARENA_MAGIC; +#endif + + /* + * Do this last, since it involved recursing in the case of base_arena. + * + * Use page alignment for delayed, so that only one page must be kept + * in RAM, rather than two (assuming a small enough array). This seems + * prudent, given that all of delayed is touched often. + */ + assert(opt_ndelay > 0); + arena->delayed = (region_t **)ipalloc(&base_arena, pagesize, + (opt_ndelay * sizeof(region_t *)) + CACHELINE); + memset(arena->delayed, 0, opt_ndelay * sizeof(region_t *)); + arena->next_delayed = 0; +} + +/* Create a new arena and insert it into the arenas array at index ind. */ +static arena_t * +arenas_extend(unsigned ind) +{ + arena_t *ret; + + ret = (arena_t *)ipalloc(&base_arena, CACHELINE, + sizeof(arena_t) + CACHELINE); + if (ret != NULL) { + arena_new(ret, true, false); + arenas[ind] = ret; + } else { + /* + * OOM here is quite inconvenient to propagate, since dealing + * with it would require a check for failure in the fast path. + * Instead, punt by using arenas[0]. In practice, this is an + * extremely unlikely failure. + */ + malloc_printf("%s: (malloc) Error initializing arena\n", + _getprogname()); + if (opt_abort) + abort(); + + ret = arenas[0]; + } + + return (ret); +} + +/* + * End arena. + */ +/******************************************************************************/ +/* + * Begin general internal functions. + */ + +/* + * Choose an arena based on a per-thread value (fast-path code, calls slow-path + * code if necessary. + */ +static __inline arena_t * +choose_arena(void) +{ + arena_t *ret; + + /* + * We can only use TLS if this is a PIC library, since for the static + * library version, libc's malloc is used by TLS allocation, which + * introduces a bootstrapping issue. + */ +#ifndef NO_TLS + ret = arenas_map; + if (ret == NULL) + ret = choose_arena_hard(); +#else + if (__isthreaded) { + unsigned long ind; + + /* + * Hash _pthread_self() to one of the arenas. There is a prime + * number of arenas, so this has a reasonable chance of + * working. Even so, the hashing can be easily thwarted by + * inconvenient _pthread_self() values. Without specific + * knowledge of how _pthread_self() calculates values, we can't + * do much better than this. + */ + ind = (unsigned long) _pthread_self() % narenas; + + /* + * Optimistially assume that arenas[ind] has been initialized. + * At worst, we find out that some other thread has already + * done so, after acquiring the lock in preparation. Note that + * this lazy locking also has the effect of lazily forcing + * cache coherency; without the lock acquisition, there's no + * guarantee that modification of arenas[ind] by another thread + * would be seen on this CPU for an arbitrary amount of time. + * + * In general, this approach to modifying a synchronized value + * isn't a good idea, but in this case we only ever modify the + * value once, so things work out well. + */ + ret = arenas[ind]; + if (ret == NULL) { + /* + * Avoid races with another thread that may have already + * initialized arenas[ind]. + */ + malloc_mutex_lock(&arenas_mtx); + if (arenas[ind] == NULL) + ret = arenas_extend((unsigned)ind); + else + ret = arenas[ind]; + malloc_mutex_unlock(&arenas_mtx); + } + } else + ret = arenas[0]; +#endif + + return (ret); +} + +#ifndef NO_TLS +/* + * Choose an arena based on a per-thread value (slow-path code only, called + * only by choose_arena()). + */ +static arena_t * +choose_arena_hard(void) +{ + arena_t *ret; + + /* Assign one of the arenas to this thread, in a round-robin fashion. */ + if (__isthreaded) { + malloc_mutex_lock(&arenas_mtx); + ret = arenas[next_arena]; + if (ret == NULL) + ret = arenas_extend(next_arena); + next_arena = (next_arena + 1) % narenas; + malloc_mutex_unlock(&arenas_mtx); + } else + ret = arenas[0]; + arenas_map = ret; + + return (ret); +} +#endif + +static void * +huge_malloc(arena_t *arena, size_t size) +{ + void *ret; + size_t chunk_size; + chunk_node_t *node; + + /* Allocate a chunk for this request. */ + +#ifdef MALLOC_STATS + arena->stats.huge.nrequests++; +#endif + + chunk_size = CHUNK_CEILING(size); + if (chunk_size == 0) { + /* size is large enough to cause size_t wrap-around. */ + ret = NULL; + goto RETURN; + } + + /* Allocate a chunk node with which to track the chunk. */ + node = ipalloc(&base_arena, CACHELINE, + sizeof(chunk_node_t) + CACHELINE); + if (node == NULL) { + ret = NULL; + goto RETURN; + } + + ret = chunk_alloc(chunk_size); + if (ret == NULL) { + idalloc(node); + ret = NULL; + goto RETURN; + } + + /* Insert node into chunks. */ + node->arena = arena; + node->chunk = ret; + node->size = chunk_size; + node->extra = chunk_size - size; + + malloc_mutex_lock(&chunks_mtx); + RB_INSERT(chunk_tree_s, &huge, node); +#ifdef MALLOC_STATS + huge_allocated += size; + huge_total += chunk_size; +#endif + malloc_mutex_unlock(&chunks_mtx); + +RETURN: + return (ret); +} + +static void +huge_dalloc(void *ptr) +{ + chunk_node_t key; + chunk_node_t *node; + + malloc_mutex_lock(&chunks_mtx); + + /* Extract from tree of huge allocations. */ + key.chunk = ptr; + node = RB_FIND(chunk_tree_s, &huge, &key); + assert(node != NULL); + assert(node->chunk == ptr); + RB_REMOVE(chunk_tree_s, &huge, node); + +#ifdef MALLOC_STATS + malloc_mutex_lock(&node->arena->mtx); + node->arena->stats.ndalloc++; + malloc_mutex_unlock(&node->arena->mtx); + + /* Update counters. */ + huge_allocated -= (node->size - node->extra); + huge_total -= node->size; +#endif + + malloc_mutex_unlock(&chunks_mtx); + + /* Unmap chunk. */ + chunk_dealloc(node->chunk, node->size); + + idalloc(node); +} + +static void * +imalloc(arena_t *arena, size_t size) +{ + void *ret; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(size != 0); + + if (region_ceiling(size) <= (chunk_size >> 1)) + ret = arena_malloc(arena, size); + else + ret = huge_malloc(arena, size); + +#ifdef MALLOC_STATS + malloc_mutex_lock(&arena->mtx); + arena->stats.nmalloc++; + malloc_mutex_unlock(&arena->mtx); +#endif + + if (opt_junk) { + if (ret != NULL) + memset(ret, 0xa5, size); + } + return (ret); +} + +static void * +ipalloc(arena_t *arena, size_t alignment, size_t size) +{ + void *ret; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + + /* + * The conditions that disallow calling arena_palloc() are quite + * tricky. + * + * The first main clause of the conditional mirrors that in imalloc(), + * and is necesary because arena_palloc() may in turn call + * arena_malloc(). + * + * The second and third clauses are necessary because we want to be + * sure that it will not be necessary to allocate more than a + * half-chunk region at any point during the creation of the aligned + * allocation. These checks closely mirror the calculation of + * alloc_size in arena_palloc(). + * + * Finally, the fourth clause makes explicit the constraint on what + * alignments will be attempted via regions. At first glance, this + * appears unnecessary, but in actuality, it protects against otherwise + * difficult-to-detect size_t wrap-around cases. + */ + if (region_ceiling(size) <= (chunk_size >> 1) + + && (alignment < sizeof(region_small_sizer_t) + || (QUANTUM_CEILING(sizeof(region_small_sizer_t)) + alignment + + (region_ceiling(size))) <= (chunk_size >> 1)) + + && (alignment >= sizeof(region_small_sizer_t) + || ((QUANTUM_CEILING(sizeof(region_small_sizer_t)) << 1) + + (region_ceiling(size))) <= (chunk_size >> 1)) + + && alignment <= (chunk_size >> 2)) + ret = arena_palloc(arena, alignment, size); + else { + if (alignment <= chunk_size) + ret = huge_malloc(arena, size); + else { + size_t chunksize, alloc_size, offset; + chunk_node_t *node; + + /* + * This allocation requires alignment that is even + * larger than chunk alignment. This means that + * huge_malloc() isn't good enough. + * + * Allocate almost twice as many chunks as are demanded + * by the size or alignment, in order to assure the + * alignment can be achieved, then unmap leading and + * trailing chunks. + */ + + chunksize = CHUNK_CEILING(size); + + if (size >= alignment) + alloc_size = chunksize + alignment - chunk_size; + else + alloc_size = (alignment << 1) - chunk_size; + + /* + * Allocate a chunk node with which to track the chunk. + */ + node = ipalloc(&base_arena, CACHELINE, + sizeof(chunk_node_t) + CACHELINE); + if (node == NULL) { + ret = NULL; + goto RETURN; + } + + ret = chunk_alloc(alloc_size); + if (ret == NULL) { + idalloc(node); + ret = NULL; + goto RETURN; + } + + offset = (size_t)ret & (alignment - 1); + assert(offset % chunk_size == 0); + assert(offset < alloc_size); + if (offset == 0) { + /* Trim trailing space. */ + chunk_dealloc((void *) ((size_t) ret + + chunksize), alloc_size - chunksize); + } else { + size_t trailsize; + + /* Trim leading space. */ + chunk_dealloc(ret, alignment - offset); + + ret = (void *) ((size_t) ret + (alignment + - offset)); + + trailsize = alloc_size - (alignment - offset) + - chunksize; + if (trailsize != 0) { + /* Trim trailing space. */ + assert(trailsize < alloc_size); + chunk_dealloc((void *) ((size_t) ret + + chunksize), trailsize); + } + } + + /* Insert node into chunks. */ + node->arena = arena; + node->chunk = ret; + node->size = chunksize; + node->extra = node->size - size; + + malloc_mutex_lock(&chunks_mtx); + RB_INSERT(chunk_tree_s, &huge, node); +#ifdef MALLOC_STATS + huge_allocated += size; + huge_total += chunksize; +#endif + malloc_mutex_unlock(&chunks_mtx); + } + } + +RETURN: +#ifdef MALLOC_STATS + malloc_mutex_lock(&arena->mtx); + arena->stats.npalloc++; + malloc_mutex_unlock(&arena->mtx); +#endif + + if (opt_junk) { + if (ret != NULL) + memset(ret, 0xa5, size); + } + assert(((size_t)ret & (alignment - 1)) == 0); + return (ret); +} + +static void * +icalloc(arena_t *arena, size_t num, size_t size) +{ + void *ret; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(num * size != 0); + + if (region_ceiling(num * size) <= (chunk_size >> 1)) + ret = arena_calloc(arena, num, size); + else { + /* + * The virtual memory system provides zero-filled pages, so + * there is no need to do so manually. + */ + ret = huge_malloc(arena, num * size); +#ifdef USE_BRK + if ((size_t)ret >= (size_t)brk_base + && (size_t)ret < (size_t)brk_max) { + /* + * This may be a re-used brk chunk. Therefore, zero + * the memory. + */ + memset(ret, 0, num * size); + } +#endif + } + +#ifdef MALLOC_STATS + malloc_mutex_lock(&arena->mtx); + arena->stats.ncalloc++; + malloc_mutex_unlock(&arena->mtx); +#endif + + return (ret); +} + +static size_t +isalloc(void *ptr) +{ + size_t ret; + chunk_node_t *node; + + assert(ptr != NULL); + assert(ptr != &nil); + + node = CHUNK_ADDR2BASE(ptr); + if (node != ptr) { + /* Region. */ + assert(node->arena->magic == ARENA_MAGIC); + + ret = arena_salloc(node->arena, ptr); + } else { + chunk_node_t key; + + /* Chunk (huge allocation). */ + + malloc_mutex_lock(&chunks_mtx); + + /* Extract from tree of huge allocations. */ + key.chunk = ptr; + node = RB_FIND(chunk_tree_s, &huge, &key); + assert(node != NULL); + + ret = node->size - node->extra; + + malloc_mutex_unlock(&chunks_mtx); + } + + return (ret); +} + +static void +idalloc(void *ptr) +{ + chunk_node_t *node; + + assert(ptr != NULL); + assert(ptr != &nil); + + node = CHUNK_ADDR2BASE(ptr); + if (node != ptr) { + /* Region. */ +#ifdef MALLOC_STATS + malloc_mutex_lock(&node->arena->mtx); + node->arena->stats.ndalloc++; + malloc_mutex_unlock(&node->arena->mtx); +#endif + arena_dalloc(node->arena, ptr); + } else + huge_dalloc(ptr); +} + +static void * +iralloc(arena_t *arena, void *ptr, size_t size) +{ + void *ret; + size_t oldsize; + + assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); + assert(ptr != NULL); + assert(ptr != &nil); + assert(size != 0); + + oldsize = isalloc(ptr); + + if (region_ceiling(size) <= (chunk_size >> 1)) { + ret = arena_malloc(arena, size); + if (ret == NULL) + goto RETURN; + if (opt_junk) + memset(ret, 0xa5, size); + + if (size < oldsize) + memcpy(ret, ptr, size); + else + memcpy(ret, ptr, oldsize); + } else { + ret = huge_malloc(arena, size); + if (ret == NULL) + goto RETURN; + if (opt_junk) + memset(ret, 0xa5, size); + + if (CHUNK_ADDR2BASE(ptr) == ptr) { + /* The old allocation is a chunk. */ + if (size < oldsize) + memcpy(ret, ptr, size); + else + memcpy(ret, ptr, oldsize); + } else { + /* The old allocation is a region. */ + assert(oldsize < size); + memcpy(ret, ptr, oldsize); + } + } + + idalloc(ptr); + +RETURN: +#ifdef MALLOC_STATS + malloc_mutex_lock(&arena->mtx); + arena->stats.nralloc++; + malloc_mutex_unlock(&arena->mtx); +#endif + return (ret); +} + +#ifdef MALLOC_STATS +static void +istats(size_t *allocated, size_t *total) +{ + size_t tallocated, ttotal; + size_t rallocated, rtotal; + unsigned i; + + tallocated = 0; + ttotal = 0; + + /* base_arena. */ + arena_stats(&base_arena, &rallocated, &rtotal); + /* + * base_arena is all overhead, so pay no attention to to rallocated + * here. + */ + tallocated = 0; + ttotal = rtotal; + + /* arenas. */ + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) { + arena_stats(arenas[i], &rallocated, &rtotal); + tallocated += rallocated; + ttotal += rtotal; + } + } + + /* huge. */ + malloc_mutex_lock(&chunks_mtx); + tallocated += huge_allocated; + ttotal += huge_total; + malloc_mutex_unlock(&chunks_mtx); + + /* Return results. */ + *allocated = tallocated; + *total = ttotal; +} +#endif + +static void +malloc_print_stats(void) +{ + + if (opt_print_stats) { + malloc_printf("___ Begin malloc statistics ___\n"); + malloc_printf("Number of CPUs: %u\n", ncpus); + malloc_printf("Number of arenas: %u\n", narenas); + malloc_printf("Cache slots: %u\n", opt_ndelay); + malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size, + opt_chunk_2pow); + malloc_printf("Quantum size: %zu (2^%zu)\n", quantum, + opt_quantum_2pow); + malloc_printf("Pointer size: %u\n", sizeof(size_t)); + malloc_printf("Number of bins: %u\n", NBINS); + malloc_printf("Maximum bin size: %u\n", bin_maxsize); + malloc_printf("Assertions %s\n", +#ifdef NDEBUG + "disabled" +#else + "enabled" +#endif + ); + malloc_printf("Redzone size: %u\n", +#ifdef MALLOC_REDZONES + MALLOC_RED +#else + 0 +#endif + ); + +#ifdef MALLOC_STATS + { + size_t a, b; + + istats(&a, &b); + malloc_printf("Allocated: %zu, space used: %zu\n", a, + b); + } + + { + arena_stats_t stats_arenas; + arena_t *arena; + unsigned i; + + /* Print chunk stats. */ + { + chunk_stats_t chunks_stats; + + malloc_mutex_lock(&chunks_mtx); + chunks_stats = stats_chunks; + malloc_mutex_unlock(&chunks_mtx); + + malloc_printf("\nchunks:\n"); + malloc_printf(" %13s%13s%13s\n", "nchunks", + "highchunks", "curchunks"); + malloc_printf(" %13llu%13lu%13lu\n", + chunks_stats.nchunks, + chunks_stats.highchunks, + chunks_stats.curchunks); + } + +#ifdef MALLOC_STATS_BASE + /* + * Copy base_arena's stats out, so that they are + * self-consistent, even if the process of printing + * stats is causing additional allocation. + */ + memset(&stats_arenas, 0, sizeof(arena_stats_t)); + malloc_mutex_lock(&base_arena.mtx); + stats_merge(&base_arena, &stats_arenas); + malloc_mutex_unlock(&base_arena.mtx); + + /* Print base_arena stats. */ + malloc_printf("\nbase_arena statistics:\n"); + stats_print(&stats_arenas); +#endif + +#ifdef MALLOC_STATS_ARENAS + /* Print stats for each arena. */ + for (i = 0; i < narenas; i++) { + arena = arenas[i]; + if (arena != NULL) { + malloc_printf( + "\narenas[%u] statistics:\n", i); + malloc_mutex_lock(&arena->mtx); + stats_print(&arena->stats); + malloc_mutex_unlock(&arena->mtx); + } else { + malloc_printf("\narenas[%u] statistics:" + " unused arena\n", i); + } + } +#endif + + /* + * Merge arena stats from arenas (leave out base_arena). + */ + memset(&stats_arenas, 0, sizeof(arena_stats_t)); + for (i = 0; i < narenas; i++) { + arena = arenas[i]; + if (arena != NULL) { + malloc_mutex_lock(&arena->mtx); + stats_merge(arena, &stats_arenas); + malloc_mutex_unlock(&arena->mtx); + } + } + + /* Print arena stats. */ + malloc_printf("\nMerged arena statistics:\n"); + stats_print(&stats_arenas); + } +#endif /* #ifdef MALLOC_STATS */ + malloc_printf("--- End malloc statistics ---\n"); + } +} + +/* + * FreeBSD's pthreads implementation calls malloc(3), so the malloc + * implementation has to take pains to avoid infinite recursion during + * initialization. + * + * atomic_init_start() returns true if it started initializing. In that case, + * the caller must also call atomic_init_finish(), just before returning + * to its caller. This delayed finalization of initialization is critical, + * since otherwise choose_arena() has no way to know whether it's safe + * to call _pthread_self(). + */ +static __inline void +malloc_init(void) +{ + + /* + * We always initialize before threads are created, since any thread + * creation first triggers allocations. + */ + assert(__isthreaded == 0 || malloc_initialized); + + if (malloc_initialized == false) + malloc_init_hard(); +} + +static void +malloc_init_hard(void) +{ + unsigned i, j; + int linklen; + char buf[PATH_MAX + 1]; + const char *opts; + + /* Get number of CPUs. */ + { + int mib[2]; + size_t len; + + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + len = sizeof(ncpus); + if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) { + /* Error. */ + ncpus = 1; + } + } + + /* Get page size. */ + { + long result; + + result = sysconf(_SC_PAGESIZE); + assert(result != -1); + pagesize = (unsigned) result; + } + + for (i = 0; i < 3; i++) { + /* Get runtime configuration. */ + switch (i) { + case 0: + if ((linklen = readlink("/etc/malloc.conf", buf, + sizeof(buf) - 1)) != -1) { + /* + * Use the contents of the "/etc/malloc.conf" + * symbolic link's name. + */ + buf[linklen] = '\0'; + opts = buf; + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + case 1: + if (issetugid() == 0 && (opts = + getenv("MALLOC_OPTIONS")) != NULL) { + /* + * Do nothing; opts is already initialized to + * the value of the MALLOC_OPTIONS environment + * variable. + */ + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + case 2: + if (_malloc_options != NULL) { + /* + * Use options that were compiled into the program. + */ + opts = _malloc_options; + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + default: + /* NOTREACHED */ + assert(false); + } + + for (j = 0; opts[j] != '\0'; j++) { + switch (opts[j]) { + case 'a': + opt_abort = false; + break; + case 'A': + opt_abort = true; + break; + case 'c': + opt_ndelay <<= 1; + if (opt_ndelay == 0) + opt_ndelay = 1; + break; + case 'C': + opt_ndelay >>= 1; + if (opt_ndelay == 0) + opt_ndelay = 1; + break; + case 'j': + opt_junk = false; + break; + case 'J': + opt_junk = true; + break; + case 'k': + if (opt_chunk_2pow > CHUNK_2POW_MIN) + opt_chunk_2pow--; + break; + case 'K': + if (opt_chunk_2pow < CHUNK_2POW_MAX) + opt_chunk_2pow++; + break; + case 'n': + opt_narenas_lshift--; + break; + case 'N': + opt_narenas_lshift++; + break; + case 'p': + opt_print_stats = false; + break; + case 'P': + opt_print_stats = true; + break; + case 'q': + if (opt_quantum_2pow > QUANTUM_2POW_MIN) + opt_quantum_2pow--; + break; + case 'Q': + if ((1 << opt_quantum_2pow) < pagesize) + opt_quantum_2pow++; + break; + case 'u': + opt_utrace = false; + break; + case 'U': + opt_utrace = true; + break; + case 'v': + opt_sysv = false; + break; + case 'V': + opt_sysv = true; + break; + case 'x': + opt_xmalloc = false; + break; + case 'X': + opt_xmalloc = true; + break; + case 'z': + opt_zero = false; + break; + case 'Z': + opt_zero = true; + break; + default: + malloc_printf("%s: (malloc) Unsupported" + " character in malloc options: '%c'\n", + _getprogname(), opts[j]); + } + } + } + + /* Take care to call atexit() only once. */ + if (opt_print_stats) { + /* Print statistics at exit. */ + atexit(malloc_print_stats); + } + + /* Set variables according to the value of opt_quantum_2pow. */ + quantum = (1 << opt_quantum_2pow); + quantum_mask = quantum - 1; + bin_shift = ((QUANTUM_CEILING(sizeof(region_small_sizer_t)) + >> opt_quantum_2pow)); + bin_maxsize = ((NBINS + bin_shift - 1) * quantum); + + /* Set variables according to the value of opt_chunk_2pow. */ + chunk_size = (1 << opt_chunk_2pow); + chunk_size_mask = chunk_size - 1; + + UTRACE(0, 0, 0); + +#ifdef MALLOC_STATS + memset(&stats_chunks, 0, sizeof(chunk_stats_t)); +#endif + + /* Various sanity checks that regard configuration. */ + assert(quantum >= 2 * sizeof(void *)); + assert(quantum <= pagesize); + assert(chunk_size >= pagesize); + assert(quantum * 4 <= chunk_size); + + /* Initialize chunks data. */ + malloc_mutex_init(&chunks_mtx); + RB_INIT(&huge); +#ifdef USE_BRK + brk_base = sbrk(0); + brk_prev = brk_base; + brk_max = (void *)((size_t)brk_base + MAXDSIZ); +#endif +#ifdef MALLOC_STATS + huge_allocated = 0; + huge_total = 0; +#endif + RB_INIT(&old_chunks); + + /* Create base arena. */ + arena_new(&base_arena, false, true); + + if (ncpus > 1) { + /* + * For SMP systems, create twice as many arenas as there are + * CPUs by default. + */ + opt_narenas_lshift++; + } + + /* Determine how many arenas to use. */ + narenas = 1; + if (opt_narenas_lshift > 0) + narenas <<= opt_narenas_lshift; + +#ifdef NO_TLS + if (narenas > 1) { + static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19, + 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, + 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, + 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, + 223, 227, 229, 233, 239, 241, 251, 257, 263}; + unsigned i, nprimes, parenas; + + /* + * Pick a prime number of hash arenas that is more than narenas + * so that direct hashing of pthread_self() pointers tends to + * spread allocations evenly among the arenas. + */ + assert((narenas & 1) == 0); /* narenas must be even. */ + nprimes = sizeof(primes) / sizeof(unsigned); + parenas = primes[nprimes - 1]; /* In case not enough primes. */ + for (i = 1; i < nprimes; i++) { + if (primes[i] > narenas) { + parenas = primes[i]; + break; + } + } + narenas = parenas; + } +#endif + +#ifndef NO_TLS + next_arena = 0; +#endif + + /* Allocate and initialize arenas. */ + arenas = (arena_t **)ipalloc(&base_arena, CACHELINE, + (sizeof(arena_t *) * narenas) + CACHELINE); + /* OOM here is fatal. */ + assert(arenas != NULL); + /* + * Zero the array. In practice, this should always be pre-zeroed, + * since it was just mmap()ed, but let's be sure. + */ + memset(arenas, 0, sizeof(arena_t *) * narenas); + + /* + * Initialize one arena here. The rest are lazily created in + * arena_choose_hard(). + */ + arenas_extend(0); + /* OOM here is fatal. */ + assert(arenas[0] != NULL); + + malloc_mutex_init(&arenas_mtx); + + malloc_initialized = true; +} + +/* + * End library-internal functions. + */ +/******************************************************************************/ +/* + * Begin malloc(3)-compatible functions. */ void * malloc(size_t size) { + void *ret; + arena_t *arena; - return (pubrealloc(NULL, size, " in malloc():")); + malloc_init(); + + if (size == 0) { + if (opt_sysv == false) + ret = &nil; + else + ret = NULL; + goto RETURN; + } + + arena = choose_arena(); + if (arena != NULL) + ret = imalloc(arena, size); + else + ret = NULL; + + if (ret == NULL) { + if (opt_xmalloc) { + malloc_printf("%s: (malloc) Error in malloc(%zu):" + " out of memory\n", _getprogname(), size); + abort(); + } + errno = ENOMEM; + } else if (opt_zero) + memset(ret, 0, size); + +RETURN: + UTRACE(0, size, ret); + return (ret); } int posix_memalign(void **memptr, size_t alignment, size_t size) { - int err; - void *result; + int ret; + arena_t *arena; + void *result; - /* Make sure that alignment is a large enough power of 2. */ - if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) - return (EINVAL); + malloc_init(); - /* - * (size | alignment) is enough to assure the requested alignment, since - * the allocator always allocates power-of-two blocks. - */ - err = errno; /* Protect errno against changes in pubrealloc(). */ - result = pubrealloc(NULL, (size | alignment), " in posix_memalign()"); - errno = err; + /* Make sure that alignment is a large enough power of 2. */ + if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) { + if (opt_xmalloc) { + malloc_printf("%s: (malloc) Error in" + " posix_memalign(%zu, %zu): invalid alignment\n", + _getprogname(), alignment, size); + abort(); + } + result = NULL; + ret = EINVAL; + goto RETURN; + } - if (result == NULL) - return (ENOMEM); + arena = choose_arena(); + if (arena != NULL) + result = ipalloc(arena, alignment, size); + else + result = NULL; - *memptr = result; - return (0); + if (result == NULL) { + if (opt_xmalloc) { + malloc_printf("%s: (malloc) Error in" + " posix_memalign(%zu, %zu): out of memory\n", + _getprogname(), alignment, size); + abort(); + } + ret = ENOMEM; + goto RETURN; + } else if (opt_zero) + memset(result, 0, size); + + *memptr = result; + ret = 0; + +RETURN: + UTRACE(0, size, result); + return (ret); } void * calloc(size_t num, size_t size) { - void *ret; + void *ret; + arena_t *arena; - if (size != 0 && (num * size) / size != num) { - /* size_t overflow. */ - errno = ENOMEM; - return (NULL); - } + malloc_init(); - ret = pubrealloc(NULL, num * size, " in calloc():"); + if (num * size == 0) { + if (opt_sysv == false) + ret = &nil; + else + ret = NULL; + goto RETURN; + } else if ((num * size) / size != num) { + /* size_t overflow. */ + ret = NULL; + goto RETURN; + } - if (ret != NULL) - memset(ret, 0, num * size); + arena = choose_arena(); + if (arena != NULL) + ret = icalloc(arena, num, size); + else + ret = NULL; - return (ret); +RETURN: + if (ret == NULL) { + if (opt_xmalloc) { + malloc_printf("%s: (malloc) Error in" + " calloc(%zu, %zu): out of memory\n", + _getprogname(), num, size); + abort(); + } + errno = ENOMEM; + } else if (opt_zero) { + /* + * This has the side effect of faulting pages in, even if the + * pages are pre-zeroed by the kernel. + */ + memset(ret, 0, num * size); + } + + UTRACE(0, num * size, ret); + return (ret); +} + +void * +realloc(void *ptr, size_t size) +{ + void *ret; + + if (size != 0) { + arena_t *arena; + + if (ptr != &nil && ptr != NULL) { + assert(malloc_initialized); + + arena = choose_arena(); + if (arena != NULL) + ret = iralloc(arena, ptr, size); + else + ret = NULL; + + if (ret == NULL) { + if (opt_xmalloc) { + malloc_printf("%s: (malloc) Error in" + " ralloc(%p, %zu): out of memory\n", + _getprogname(), ptr, size); + abort(); + } + errno = ENOMEM; + } else if (opt_zero) { + size_t old_size; + + old_size = isalloc(ptr); + + if (old_size < size) { + memset(&((char *)ret)[old_size], 0, + size - old_size); + } + } + } else { + malloc_init(); + + arena = choose_arena(); + if (arena != NULL) + ret = imalloc(arena, size); + else + ret = NULL; + + if (ret == NULL) { + if (opt_xmalloc) { + malloc_printf("%s: (malloc) Error in" + " ralloc(%p, %zu): out of memory\n", + _getprogname(), ptr, size); + abort(); + } + errno = ENOMEM; + } else if (opt_zero) + memset(ret, 0, size); + } + } else { + if (ptr != &nil && ptr != NULL) + idalloc(ptr); + + ret = &nil; + } + + UTRACE(ptr, size, ret); + return (ret); } void free(void *ptr) { - pubrealloc(ptr, 0, " in free():"); -} - -void * -realloc(void *ptr, size_t size) -{ - - return (pubrealloc(ptr, size, " in realloc():")); + UTRACE(ptr, 0, 0); + if (ptr != &nil && ptr != NULL) { + assert(malloc_initialized); + + idalloc(ptr); + } } +/* + * End malloc(3)-compatible functions. + */ +/******************************************************************************/ /* * Begin library-private functions, used by threading libraries for protection * of malloc during fork(). These functions are only called if the program is @@ -1218,13 +4743,42 @@ realloc(void *ptr, size_t size) void _malloc_prefork(void) { + unsigned i; - _spinlock(__malloc_lock); + /* Acquire all mutexes in a safe order. */ + + malloc_mutex_lock(&arenas_mtx); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + malloc_mutex_lock(&arenas[i]->mtx); + } + malloc_mutex_unlock(&arenas_mtx); + + malloc_mutex_lock(&base_arena.mtx); + + malloc_mutex_lock(&chunks_mtx); } void _malloc_postfork(void) { + unsigned i; - _spinunlock(__malloc_lock); + /* Release all mutexes, now that fork() has completed. */ + + malloc_mutex_unlock(&chunks_mtx); + + malloc_mutex_unlock(&base_arena.mtx); + + malloc_mutex_lock(&arenas_mtx); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + malloc_mutex_unlock(&arenas[i]->mtx); + } + malloc_mutex_unlock(&arenas_mtx); } + +/* + * End library-private functions. + */ +/******************************************************************************/