Compare commits

..

7 commits
master ... ptc

Author SHA1 Message Date
Greg Burd
53f5df3999 merged commit 831abf2 from illumos-gate
7236 libumem should be able to abort() when an allocation fails
2016-11-30 10:43:02 -05:00
Greg Burd
1b68d2294b Adding missing genasm/ptc platform specific sources. 2015-08-23 14:56:49 -04:00
Greg Burd
f72cf3fe8d Fix the platform ifdef. 2015-08-23 14:56:44 -04:00
Greg Burd
e1cadcd524 Fix compile warning: function '__umem_assert_failed' declared 'noreturn' should not return. 2015-08-22 19:28:00 -04:00
Greg Burd
5e196c5de4 Fix compiler warning: enumeration value 'ITEM_INVALID' not handled in switch. 2015-08-22 19:23:34 -04:00
Greg Burd
075af6a00c Ignore some generated build files. 2015-08-22 19:19:08 -04:00
Greg Burd
089236050e Raw, untested merge of illumos-{joyent,omnios} trying to bring in the per-thread cache (ptc) changes. 2015-08-22 15:40:44 -04:00
33 changed files with 4443 additions and 3428 deletions

View file

@ -38,9 +38,6 @@ libumem_la_SOURCES = init_lib.c \
libumem_malloc_la_SOURCES = malloc.c libumem_malloc_la_SOURCES = malloc.c
libumem_malloc_la_LDFLAGS = -lpthread -R$(libdir) -lumem libumem_malloc_la_LDFLAGS = -lpthread -R$(libdir) -lumem
AM_CFLAGS = -fno-builtin-calloc
ACLOCAL_AMFLAGS = -I m4
man3_MANS = umem_alloc.3 umem_cache_create.3 umem_debug.3 man3_MANS = umem_alloc.3 umem_cache_create.3 umem_debug.3
EXTRA_DIST = COPYRIGHT OPENSOLARIS.LICENSE umem.spec Doxyfile umem_test4 \ EXTRA_DIST = COPYRIGHT OPENSOLARIS.LICENSE umem.spec Doxyfile umem_test4 \
$(man3_MANS) $(man3_MANS)

View file

@ -16,7 +16,7 @@ installation instructions are set forth in the file README.
incomplete. The subversion history is the authoritative documentation of all incomplete. The subversion history is the authoritative documentation of all
recent changes. recent changes.
Report bugs at https://github.com/gburd/libumem/issues Report bugs at https://labs.omniti.com/trac/portableumem/newticket
* Checking Out the Sources * Checking Out the Sources
@ -45,16 +45,6 @@ detailed instructions). Finally, run make. Notice that the first make
of the package should be made in the source directory. Subsequent of the package should be made in the source directory. Subsequent
makes can use build directory different from the source one. makes can use build directory different from the source one.
* Use
To use link with both umem and umem_malloc (e.g. '-lumem -lumem_malloc').
* Testing
There are two basic test files included, test.c and hello.c, with
basic instructions within those files at the end. Basic performance
testing was done using hyperfine.
* Copyright information: * Copyright information:
Please see COPYRIGHT and OPENSOLARIS.LICENSE for the copyright Please see COPYRIGHT and OPENSOLARIS.LICENSE for the copyright

2
TODO
View file

@ -17,3 +17,5 @@ To-do List for the Linux port of umem
* doxygen'ate the headers/code, to produce reference docs. * doxygen'ate the headers/code, to produce reference docs.
* HAVE_DOT in Doxyfile.in should be detected by configure. * HAVE_DOT in Doxyfile.in should be detected by configure.
* option to not use brk() as it's no longer supported on OSX as of 10.10

602
amd64/umem_genasm.c Normal file
View file

@ -0,0 +1,602 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013 Joyent, Inc. All rights reserved.
*/
/*
* Don't Panic! If you find the blocks of assembly that follow confusing and
* you're questioning why they exist, please go read section 8 of the umem.c big
* theory statement. Next familiarize yourself with the malloc and free
* implementations in libumem's malloc.c.
*
* What follows is the amd64 implementation of the thread caching automatic
* assembly generation. The amd64 calling conventions are documented in the
* 64-bit System V ABI. For our purposes what matters is that our first argument
* will come in rdi. Our functions have to preserve rbp, rbx, and r12->r15. We
* are free to do whatever we want with rax, rcx, rdx, rsi, rdi, and r8->r11.
*
* For both our implementation of malloc and free we only use the registers we
* don't have to preserve.
*
* Malloc register usage:
* o. rdi: Original size to malloc. This never changes and is preserved.
* o. rsi: Adjusted malloc size for malloc_data_tag(s).
* o. rcx: Pointer to the tmem_t in the ulwp_t.
* o. rdx: Pointer to the tmem_t array of roots
* o. r8: Size of the cache
* o. r9: Scratch register
*
* Free register usage:
* o. rdi: Original buffer to free. This never changes and is preserved.
* o. rax: The actual buffer, adjusted for the hidden malloc_data_t(s).
* o. rcx: Pointer to the tmem_t in the ulwp_t.
* o. rdx: Pointer to the tmem_t array of roots
* o. r8: Size of the cache
* o. r9: Scratch register
*
* Once we determine what cache we are using, we increment %rdx to the
* appropriate offset and set %r8 with the size of the cache. This means that
* when we break out to the normal buffer allocation point %rdx contains the
* head of the linked list and %r8 is the amount that we have to adjust the
* thread's cached amount by.
*
* Each block of assembly has psuedocode that describes its purpose.
*/
#include <atomic.h>
#include <inttypes.h>
#include <sys/types.h>
#include <strings.h>
#include <umem_impl.h>
#include "umem_base.h"
const int umem_genasm_supported = 1;
static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc;
static size_t umem_genasm_msize = 576;
static uintptr_t umem_genasm_fptr = (uintptr_t)&_free;
static size_t umem_genasm_fsize = 576;
static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc;
static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free;
#define UMEM_GENASM_MAX64 (UINT32_MAX / sizeof (uintptr_t))
#define PTC_JMPADDR(dest, src) (dest - (src + 4))
#define PTC_ROOT_SIZE sizeof (uintptr_t)
#define MULTINOP 0x0000441f0f
/*
* void *ptcmalloc(size_t orig_size);
*
* size_t size = orig_size + 8;
* if (size > UMEM_SECOND_ALIGN)
* size += 8;
*
* if (size < orig_size)
* goto tomalloc; ! This is overflow
*
* if (size > cache_max)
* goto tomalloc
*
* tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
* void **roots = t->tm_roots;
*/
#define PTC_MALINIT_JOUT 0x13
#define PTC_MALINIT_MCS 0x1a
#define PTC_MALINIT_JOV 0x20
#define PTC_MALINIT_SOFF 0x30
static const uint8_t malinit[] = {
0x48, 0x8d, 0x77, 0x08, /* leaq 0x8(%rdi),%rsi */
0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10, %rsi */
0x76, 0x04, /* jbe +0x4 */
0x48, 0x8d, 0x77, 0x10, /* leaq 0x10(%rdi),%rsi */
0x48, 0x39, 0xfe, /* cmpq %rdi,%rsi */
0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jb +errout */
0x48, 0x81, 0xfe,
0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */
0x64, 0x48, 0x8b, 0x0c, 0x25,
0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */
0x48, 0x81, 0xc1,
0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */
0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */
};
/*
* void ptcfree(void *buf);
*
* if (buf == NULL)
* return;
*
* malloc_data_t *tag = buf;
* tag--;
* int size = tag->malloc_size;
* int tagval = UMEM_MALLOC_DECODE(tag->malloc_tag, size);
* if (tagval == MALLOC_SECOND_MAGIC) {
* tag--;
* } else if (tagval != MALLOC_MAGIC) {
* goto tofree;
* }
*
* if (size > cache_max)
* goto tofree;
*
* tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
* void **roots = t->tm_roots;
*/
#define PTC_FRINI_JDONE 0x05
#define PTC_FRINI_JFREE 0x25
#define PTC_FRINI_MCS 0x30
#define PTC_FRINI_JOV 0x36
#define PTC_FRINI_SOFF 0x46
static const uint8_t freeinit[] = {
0x48, 0x85, 0xff, /* testq %rdi,%rdi */
0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* jmp $JDONE (done) */
0x8b, 0x77, 0xf8, /* movl -0x8(%rdi),%esi */
0x8b, 0x47, 0xfc, /* movl -0x4(%rdi),%eax */
0x01, 0xf0, /* addl %esi,%eax */
0x3d, 0x00, 0x70, 0xba, 0x16, /* cmpl $MALLOC_2_MAGIC, %eax */
0x75, 0x06, /* jne +0x6 (checkover) */
0x48, 0x8d, 0x47, 0xf0, /* leaq -0x10(%rdi),%eax */
0xeb, 0x0f, /* jmp +0xf (freebuf) */
0x3d, 0x00, 0xc0, 0x10, 0x3a, /* cmpl $MALLOC_MAGIC, %eax */
0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jmp +JFREE (goto torfree) */
0x48, 0x8d, 0x47, 0xf8, /* leaq -0x8(%rdi),%rax */
0x48, 0x81, 0xfe,
0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */
0x64, 0x48, 0x8b, 0x0c, 0x25,
0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */
0x48, 0x81, 0xc1,
0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */
0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */
};
/*
* if (size <= $CACHE_SIZE) {
* csize = $CACHE_SIZE;
* } else ... ! goto next cache
*/
#define PTC_INICACHE_CMP 0x03
#define PTC_INICACHE_SIZE 0x0c
#define PTC_INICACHE_JMP 0x11
static const uint8_t inicache[] = {
0x48, 0x81, 0xfe,
0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
0x77, 0x0c, /* ja +0xc (next cache) */
0x49, 0xc7, 0xc0,
0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */
0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp $JMP (allocbuf) */
};
/*
* if (size <= $CACHE_SIZE) {
* csize = $CACHE_SIZE;
* roots += $CACHE_NUM;
* } else ... ! goto next cache
*/
#define PTC_GENCACHE_CMP 0x03
#define PTC_GENCACHE_SIZE 0x0c
#define PTC_GENCACHE_NUM 0x13
#define PTC_GENCACHE_JMP 0x18
static const uint8_t gencache[] = {
0x48, 0x81, 0xfe,
0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
0x77, 0x14, /* ja +0xc (next cache) */
0x49, 0xc7, 0xc0,
0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */
0x48, 0x81, 0xc2,
0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */
0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf ) */
};
/*
* else if (size <= $CACHE_SIZE) {
* csize = $CACHE_SIZE;
* roots += $CACHE_NUM;
* } else {
* goto tofunc; ! goto tomalloc if ptcmalloc.
* } ! goto tofree if ptcfree.
*/
#define PTC_FINCACHE_CMP 0x03
#define PTC_FINCACHE_JMP 0x08
#define PTC_FINCACHE_SIZE 0x0c
#define PTC_FINCACHE_NUM 0x13
static const uint8_t fincache[] = {
0x48, 0x81, 0xfe,
0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
0x77, 0x00, /* ja +JMP (to real malloc) */
0x49, 0xc7, 0xc0,
0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */
0x48, 0x81, 0xc2,
0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */
};
/*
* if (*root == NULL)
* goto tomalloc;
*
* malloc_data_t *ret = *root;
* *root = *(void **)ret;
* t->tm_size += csize;
* ret->malloc_size = size;
*
* if (size > UMEM_SECOND_ALIGN) {
* ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
* ret += 2;
* } else {
* ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
* ret += 1;
* }
*
* return ((void *)ret);
* tomalloc:
* return (malloc(orig_size));
*/
#define PTC_MALFINI_ALLABEL 0x00
#define PTC_MALFINI_JMLABEL 0x40
#define PTC_MALFINI_JMADDR 0x41
static const uint8_t malfini[] = {
0x48, 0x8b, 0x02, /* movl (%rdx),%rax */
0x48, 0x85, 0xc0, /* testq %rax,%rax */
0x74, 0x38, /* je +0x38 (errout) */
0x4c, 0x8b, 0x08, /* movq (%rax),%r9 */
0x4c, 0x89, 0x0a, /* movq %r9,(%rdx) */
0x4c, 0x29, 0x01, /* subq %rsi,(%rcx) */
0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10,%rsi */
0x76, 0x15, /* jbe +0x15 */
0x41, 0xb9, 0x00, 0x70, 0xba, 0x16, /* movl $MALLOC_MAGIC_2, %r9d */
0x89, 0x70, 0x08, /* movl %r9d,0x8(%rax) */
0x41, 0x29, 0xf1, /* subl %esi, %r9d */
0x44, 0x89, 0x48, 0x0c, /* movl %r9d, 0xc(%rax) */
0x48, 0x83, 0xc0, 0x10, /* addq $0x10, %rax */
0xc3, /* ret */
0x41, 0xb9, 0x00, 0xc0, 0x10, 0x3a, /* movl %MALLOC_MAGIC, %r9d */
0x89, 0x30, /* movl %esi,(%rax) */
0x41, 0x29, 0xf1, /* subl %esi,%r9d */
0x44, 0x89, 0x48, 0x04, /* movl %r9d,0x4(%rax) */
0x48, 0x83, 0xc0, 0x08, /* addq $0x8,%rax */
0xc3, /* ret */
0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $MALLOC */
};
/*
* if (t->tm_size + csize > umem_ptc_size)
* goto tofree;
*
* t->tm_size += csize
* *(void **)tag = *root;
* *root = tag;
* return;
* tofree:
* free(buf);
* return;
*/
#define PTC_FRFINI_RBUFLABEL 0x00
#define PTC_FRFINI_CACHEMAX 0x09
#define PTC_FRFINI_DONELABEL 0x1b
#define PTC_FRFINI_JFLABEL 0x1c
#define PTC_FRFINI_JFADDR 0x1d
static const uint8_t freefini[] = {
0x4c, 0x8b, 0x09, /* movq (%rcx),%r9 */
0x4d, 0x01, 0xc1, /* addq %r8, %r9 */
0x49, 0x81, 0xf9,
0x00, 0x00, 0x00, 0x00, /* cmpl $THR_CACHE_MAX, %r9 */
0x77, 0x0d, /* jae +0xd (torfree) */
0x4c, 0x01, 0x01, /* addq %r8,(%rcx) */
0x4c, 0x8b, 0x0a, /* movq (%rdx),%r9 */
0x4c, 0x89, 0x08, /* movq %r9,(%rax) */
0x48, 0x89, 0x02, /* movq %rax,(%rdx) */
0xc3, /* ret */
0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */
};
/*
* Construct the initial part of malloc. off contains the offset from curthread
* to the root of the tmem structure. ep is the address of the label to error
* and jump to free. csize is the size of the largest umem_cache in ptcumem.
*/
static int
genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize)
{
uint32_t addr;
bcopy(malinit, bp, sizeof (malinit));
addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT);
bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr));
bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize));
addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV);
bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr));
bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off));
return (sizeof (malinit));
}
static int
genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mcs)
{
uint32_t addr;
bcopy(freeinit, bp, sizeof (freeinit));
addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE);
bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr));
addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE);
bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr));
bcopy(&mcs, bp + PTC_FRINI_MCS, sizeof (mcs));
addr = PTC_JMPADDR(ep, PTC_FRINI_JOV);
bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr));
bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off));
return (sizeof (freeinit));
}
/*
* Create the initial cache entry of the specified size. The value of ap tells
* us what the address of the label to try and allocate a buffer. This value is
* an offset from the current base to that value.
*/
static int
genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap)
{
uint32_t addr;
bcopy(inicache, bp, sizeof (inicache));
bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize));
bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize));
addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP);
ASSERT(addr != 0);
bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr));
return (sizeof (inicache));
}
static int
genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap)
{
uint32_t addr;
uint32_t coff;
ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num);
ASSERT(num != 0);
bcopy(gencache, bp, sizeof (gencache));
bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize));
bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize));
coff = num * PTC_ROOT_SIZE;
bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff));
addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP);
bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr));
return (sizeof (gencache));
}
static int
genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep)
{
uint8_t eap;
uint32_t coff;
ASSERT(ep <= 0xff && ep > 7);
ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num);
bcopy(fincache, bp, sizeof (fincache));
bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize));
bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize));
coff = num * PTC_ROOT_SIZE;
bcopy(&coff, bp + PTC_FINCACHE_NUM, sizeof (coff));
eap = ep - PTC_FINCACHE_JMP - 1;
bcopy(&eap, bp + PTC_FINCACHE_JMP, sizeof (eap));
return (sizeof (fincache));
}
static int
genasm_malfini(uint8_t *bp, uintptr_t mptr)
{
uint32_t addr;
bcopy(malfini, bp, sizeof (malfini));
addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR));
bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr));
return (sizeof (malfini));
}
static int
genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr)
{
uint32_t addr;
bcopy(freefini, bp, sizeof (freefini));
bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr));
addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR));
bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr));
return (sizeof (freefini));
}
/*
* The malloc inline assembly is constructed as follows:
*
* o Malloc prologue assembly
* o Generic first-cache check
* o n Generic cache checks (where n = _tmem_get_entries() - 2)
* o Generic last-cache check
* o Malloc epilogue assembly
*
* Generally there are at least three caches. When there is only one cache we
* only use the generic last-cache. In the case where there are two caches, we
* just leave out the middle ones.
*/
static int
genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes)
{
int ii, off;
uint8_t *bp;
size_t total;
uint32_t allocoff, erroff;
total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache);
if (nents >= 2)
total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
if (total > len)
return (1);
erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL;
allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL;
bp = base;
off = genasm_malinit(bp, umem_tmem_off, erroff,
umem_alloc_sizes[nents-1]);
bp += off;
allocoff -= off;
erroff -= off;
if (nents > 1) {
off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff);
bp += off;
allocoff -= off;
erroff -= off;
}
for (ii = 1; ii < nents - 1; ii++) {
off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff);
bp += off;
allocoff -= off;
erroff -= off;
}
bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
erroff);
bp += genasm_malfini(bp, umem_genasm_omptr);
ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
return (0);
}
static int
genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes)
{
uint8_t *bp;
int ii, off;
size_t total;
uint32_t rbufoff, retoff, erroff;
/* Assume that nents has already been audited for us */
total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache);
if (nents >= 2)
total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
if (total > len)
return (1);
erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL);
rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL);
retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL);
bp = base;
off = genasm_frinit(bp, umem_tmem_off, retoff, erroff,
umem_alloc_sizes[nents - 1]);
bp += off;
erroff -= off;
rbufoff -= off;
if (nents > 1) {
off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff);
bp += off;
erroff -= off;
rbufoff -= off;
}
for (ii = 1; ii < nents - 1; ii++) {
off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff);
bp += off;
rbufoff -= off;
erroff -= off;
}
bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
erroff);
bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr);
ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
return (0);
}
/*ARGSUSED*/
int
umem_genasm(int *cp, umem_cache_t **caches, int nc)
{
int nents, i;
uint8_t *mptr;
uint8_t *fptr;
uint64_t v, *vptr;
mptr = (void *)((uintptr_t)umem_genasm_mptr + 5);
fptr = (void *)((uintptr_t)umem_genasm_fptr + 5);
if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 ||
umem_genasm_fptr == 0 || umem_genasm_fsize == 0)
return (1);
/*
* The total number of caches that we can service is the minimum of:
* o the amount supported by libc
* o the total number of umem caches
* o we use a single byte addl, so it's MAX_UINT32 / sizeof (uintptr_t)
* For 64-bit, this is MAX_UINT32 >> 3, a lot.
*/
nents = _tmem_get_nentries();
if (UMEM_GENASM_MAX64 < nents)
nents = UMEM_GENASM_MAX64;
if (nc < nents)
nents = nc;
/* Based on our constraints, this is not an error */
if (nents == 0 || umem_ptc_size == 0)
return (0);
/* Take into account the jump */
if (genasm_malloc(mptr, umem_genasm_msize, nents, cp) != 0)
return (1);
if (genasm_free(fptr, umem_genasm_fsize, nents, cp) != 0)
return (1);
/* nop out the jump with a multibyte jump */
vptr = (void *)umem_genasm_mptr;
v = MULTINOP;
v |= *vptr & (0xffffffULL << 40);
(void) atomic_swap_64(vptr, v);
vptr = (void *)umem_genasm_fptr;
v = MULTINOP;
v |= *vptr & (0xffffffULL << 40);
(void) atomic_swap_64(vptr, v);
for (i = 0; i < nents; i++)
caches[i]->cache_flags |= UMF_PTC;
return (0);
}

View file

@ -1,5 +1,6 @@
AC_INIT([umem], [1.0.2], [], [umem]) AC_INIT([umem], [1.0.2], [], [umem])
AM_INIT_AUTOMAKE([dist-bzip2]) AM_INIT_AUTOMAKE([dist-bzip2])
AC_CONFIG_MACRO_DIRS([m4]) AC_CONFIG_MACRO_DIRS([m4])
AC_PROG_CC AC_PROG_CC

View file

@ -24,7 +24,7 @@
* Use is subject to license terms. * Use is subject to license terms.
* *
* Portions Copyright 2012 Joyent, Inc. All rights reserved. * Portions Copyright 2012 Joyent, Inc. All rights reserved.
* * Portions Copyright 2015 by Delphix. All rights reserved.
* Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved.
*/ */
@ -181,7 +181,10 @@ static umem_env_item_t umem_options_items[] = {
}, },
#endif #endif
#endif #endif
{ "perthread_cache", "Evolving", ITEM_SIZE,
"Size (in bytes) of per-thread allocation cache",
NULL, 0, NULL, &umem_ptc_size
},
{ NULL, "-- end of UMEM_OPTIONS --", ITEM_INVALID } { NULL, "-- end of UMEM_OPTIONS --", ITEM_INVALID }
}; };
@ -250,6 +253,10 @@ static umem_env_item_t umem_debug_items[] = {
"Enables writing all logged messages to stderr", "Enables writing all logged messages to stderr",
&umem_output, 2 &umem_output, 2
}, },
{ "checknull", "Private", ITEM_FLAG,
"Abort if an allocation would return null",
&umem_flags, UMF_CHECKNULL
},
{ NULL, "-- end of UMEM_DEBUG --", ITEM_INVALID } { NULL, "-- end of UMEM_DEBUG --", ITEM_INVALID }
}; };
@ -558,6 +565,9 @@ process_item(const umem_env_item_t *item, const char *item_arg)
case ITEM_SIZE: case ITEM_SIZE:
arg_required = 1; arg_required = 1;
break; break;
case ITEM_INVALID:
default:
break;
} }
switch (item->item_type) { switch (item->item_type) {

17
hello.c
View file

@ -1,17 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
int main() {
char *ptr = (char *)malloc(4096);
snprintf(ptr, 4096, "%s", "Hello, World!");
printf("%s 0x%p\n", ptr, ptr);
return 0;
}
/*
* Local variables:
* tab-width:4
* compile-command: "gcc -fpic -Wall -Werror -Ofast -march=native -mtune=native hello.c -o hello"
* End:
*/

115
i386/asm_subr.s Normal file
View file

@ -0,0 +1,115 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/asm_linkage.h>
#define NOP4 \
nop; \
nop; \
nop; \
nop;
#define NOP16 \
NOP4 \
NOP4 \
NOP4 \
NOP4
#define NOP64 \
NOP16 \
NOP16 \
NOP16 \
NOP16
#define NOP256 \
NOP64 \
NOP64 \
NOP64 \
NOP64
#if defined(lint)
void *
getfp(void)
{
return (NULL);
}
#ifndef UMEM_STANDALONE
void
_breakpoint(void)
{
return;
}
#endif
#else /* lint */
#if defined(__amd64)
ENTRY(getfp)
movq %rbp, %rax
ret
SET_SIZE(getfp)
#else /* __i386 */
ENTRY(getfp)
movl %ebp, %eax
ret
SET_SIZE(getfp)
#endif
#ifndef UMEM_STANDALONE
ENTRY(_breakpoint)
int $3
ret
SET_SIZE(_breakpoint)
#endif
ENTRY(_malloc)
jmp umem_malloc;
NOP256
NOP256
#if defined(__amd64)
NOP64
#endif
SET_SIZE(_malloc)
ENTRY(_free)
jmp umem_malloc_free;
NOP256
NOP256
#if defined(__amd64)
NOP64
#endif
SET_SIZE(_free)
ANSI_PRAGMA_WEAK2(malloc,_malloc,function)
ANSI_PRAGMA_WEAK2(free,_free,function)
#endif /* lint */

595
i386/umem_genasm.c Normal file
View file

@ -0,0 +1,595 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
*/
/*
* Don't Panic! If you find the blocks of assembly that follow confusing and
* you're questioning why they exist, please go read section 8 of the umem.c big
* theory statement. Next familiarize yourself with the malloc and free
* implementations in libumem's malloc.c.
*
* What follows is the i386 implementation of the thread caching automatic
* assembly generation. With i386 a function only has three registers it's
* allowed to change without restoring them: eax, ecx, and edx. All others have
* to be preserved. Since the set of registers we have available is so small, we
* have to make use of esi, ebx, and edi and save their original values to the
* stack.
*
* Malloc register usage:
* o. esi: Size of the malloc (passed into us and modified)
* o. edi: Size of the cache
* o. eax: Buffer to return
* o. ebx: Scratch space and temporary values
* o. ecx: Pointer to the tmem_t in the ulwp_t.
* o. edx: Pointer to the tmem_t array of roots
*
* Free register usage:
* o. esi: Size of the malloc (passed into us and modified)
* o. edi: Size of the cache
* o. eax: Buffer to free
* o. ebx: Scratch space and temporary values
* o. ecx: Pointer to the tmem_t in the ulwp_t.
* o. edx: Pointer to the tmem_t array of roots
*
* Once we determine what cache we are using, we increment %edx to the
* appropriate offset and set %edi with the size of the cache. This means that
* when we break out to the normal buffer allocation point %edx contains the
* head of the linked list and %edi is the amount that we have to adjust the
* total amount cached by the thread.
*
* Each block of assembly has psuedocode that describes its purpose.
*/
#include <inttypes.h>
#include <strings.h>
#include <umem_impl.h>
#include "umem_base.h"
#include <atomic.h>
const int umem_genasm_supported = 1;
static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc;
static size_t umem_genasm_msize = 512;
static uintptr_t umem_genasm_fptr = (uintptr_t)&_free;
static size_t umem_genasm_fsize = 512;
static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc;
static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free;
/*
* The maximum number of caches we can support. We use a single byte addl so
* this is 255 (UINT8_MAX) / sizeof (uintptr_t). In this case 63
*/
#define UMEM_GENASM_MAX32 63
#define PTC_JMPADDR(dest, src) (dest - (src + 4))
#define PTC_ROOT_SIZE sizeof (uintptr_t)
#define MULTINOP 0x0000441f0f
/*
* void *ptcmalloc(size_t orig_size);
*
* size_t size = orig_size + 8;
*
* if (size < orig_size)
* goto tomalloc; ! This is overflow
*
* if (size > cache_size)
* goto tomalloc;
*
* tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
* void **roots = t->tm_roots;
*/
#define PTC_MALINIT_JOUT 0x0e
#define PTC_MALINIT_MCS 0x14
#define PTC_MALINIT_JOV 0x1a
#define PTC_MALINIT_SOFF 0x27
static const uint8_t malinit[] = {
0x55, /* pushl %ebp */
0x89, 0xe5, /* movl %esp, %ebp */
0x57, /* pushl %edi */
0x56, /* pushl %esi */
0x53, /* pushl %ebx */
0x8b, 0x75, 0x08, /* movl 0x8(%ebp), %esi */
0x83, 0xc6, 0x08, /* addl $0x8,%esi */
0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jc +$JMP (errout) */
0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */
0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */
0x65, 0x8b, 0x0d, 0x00, 0x00, 0x00, 0x00, /* movl %gs:0x0,%ecx */
0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $OFF, %ecx */
0x8d, 0x51, 0x04 /* leal 0x4(%ecx), %edx */
};
/*
* void ptcfree(void *buf);
*
* if (buf == NULL)
* return;
*
* malloc_data_t *tag = buf;
* tag--;
* int size = tag->malloc_size;
* int tagtval = UMEM_MALLOC_DECODE(tag->malloc_tag, size);
*
* if (tagval != MALLOC_MAGIC)
* goto tofree;
*
* if (size > cache_max)
* goto tofree;
*
* tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
* void **roots = t->tm_roots;
*/
#define PTC_FRINI_JDONE 0x0d
#define PTC_FRINI_JFREE 0x23
#define PTC_FRINI_MCS 0x29
#define PTC_FRINI_JOV 0x2f
#define PTC_FRINI_SOFF 0x3c
static const uint8_t freeinit[] = {
0x55, /* pushl %ebp */
0x89, 0xe5, /* movl %esp, %ebp */
0x57, /* pushl %edi */
0x56, /* pushl %esi */
0x53, /* pushl %ebx */
0x8b, 0x45, 0x08, /* movl 0x8(%ebp), %eax */
0x85, 0xc0, /* testl %eax, %eax */
0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* je $JDONE (done) */
0x83, 0xe8, 0x08, /* subl $0x8,%eax */
0x8b, 0x30, /* movl (%eax),%esi */
0x8b, 0x50, 0x04, /* movl 0x4(%eax),%edx */
0x01, 0xf2, /* addl %esi,%edx */
0x81, 0xfa, 0x00, 0xc0, 0x10, 0x3a, /* cmpl MAGIC32, %edx */
0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jne +JFREE (goto freebuf) */
0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */
0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */
0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */
0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $0xOFF, %ecx */
0x8d, 0x51, 0x04 /* leal 0x4(%ecx),%edx */
};
/*
* if (size <= $CACHE_SIZE) {
* csize = $CACHE_SIZE;
* } else ... ! goto next cache
*/
#define PTC_INICACHE_CMP 0x02
#define PTC_INICACHE_SIZE 0x09
#define PTC_INICACHE_JMP 0x0e
static const uint8_t inicache[] = {
0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */
0x77, 0x0a, /* ja +0xa */
0xbf, 0xff, 0x00, 0x00, 0x00, /* movl sizeof ($C0), %edi */
0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */
};
/*
* if (size <= $CACHE_SIZE) {
* csize = $CACHE_SIZE;
* roots += $CACHE_NUM;
* } else ... ! goto next cache
*/
#define PTC_GENCACHE_CMP 0x02
#define PTC_GENCACHE_NUM 0x0a
#define PTC_GENCACHE_SIZE 0x0c
#define PTC_GENCACHE_JMP 0x11
static const uint8_t gencache[] = {
0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($CACHE), %esi */
0x77, 0x0d, /* ja +0xd (next cache) */
0x83, 0xc2, 0x00, /* addl $4*$ii, %edx */
0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CACHE), %edi */
0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */
};
/*
* else if (size <= $CACHE_SIZE) {
* csize = $CACHE_SIZE;
* roots += $CACHE_NUM;
* } else {
* goto tofunc; ! goto tomalloc if ptcmalloc.
* } ! goto tofree if ptcfree.
*/
#define PTC_FINCACHE_CMP 0x02
#define PTC_FINCACHE_JMP 0x07
#define PTC_FINCACHE_NUM 0x0a
#define PTC_FINCACHE_SIZE 0x0c
static const uint8_t fincache[] = {
0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($CLAST), %esi */
0x77, 0x00, /* ja +$JMP (to errout) */
0x83, 0xc2, 0x00, /* addl $4*($NCACHES-1), %edx */
0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CLAST), %edi */
};
/*
* if (*root == NULL)
* goto tomalloc;
*
* malloc_data_t *ret = *root;
* *root = *(void **)ret;
* t->tm_size += csize;
* ret->malloc_size = size;
*
* ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
* ret++;
*
* return ((void *)ret);
* tomalloc:
* return (malloc(orig_size));
*/
#define PTC_MALFINI_ALLABEL 0x00
#define PTC_MALFINI_JMLABEL 0x20
#define PTC_MALFINI_JMADDR 0x25
static const uint8_t malfini[] = {
/* allocbuf: */
0x8b, 0x02, /* movl (%edx), %eax */
0x85, 0xc0, /* testl %eax, %eax */
0x74, 0x1a, /* je +0x1a (errout) */
0x8b, 0x18, /* movl (%eax), %esi */
0x89, 0x1a, /* movl %esi, (%edx) */
0x29, 0x39, /* subl %edi, (%ecx) */
0x89, 0x30, /* movl %esi, ($eax) */
0xba, 0x00, 0xc0, 0x10, 0x3a, /* movl $0x3a10c000,%edx */
0x29, 0xf2, /* subl %esi, %edx */
0x89, 0x50, 0x04, /* movl %edx, 0x4(%eax) */
0x83, 0xc0, 0x08, /* addl %0x8, %eax */
0x5b, /* popl %ebx */
0x5e, /* popl %esi */
0x5f, /* popl %edi */
0xc9, /* leave */
0xc3, /* ret */
/* errout: */
0x5b, /* popl %ebx */
0x5e, /* popl %esi */
0x5f, /* popl %edi */
0xc9, /* leave */
0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $malloc */
};
/*
* if (t->tm_size + csize > umem_ptc_size)
* goto tofree;
*
* t->tm_size += csize
* *(void **)tag = *root;
* *root = tag;
* return;
* tofree:
* free(buf);
* return;
*/
#define PTC_FRFINI_RBUFLABEL 0x00
#define PTC_FRFINI_CACHEMAX 0x06
#define PTC_FRFINI_DONELABEL 0x14
#define PTC_FRFINI_JFLABEL 0x19
#define PTC_FRFINI_JFADDR 0x1e
static const uint8_t freefini[] = {
/* freebuf: */
0x8b, 0x19, /* movl (%ecx),%ebx */
0x01, 0xfb, /* addl %edi,%ebx */
0x81, 0xfb, 0x00, 0x00, 0x00, 0x00, /* cmpl maxsize, %ebx */
0x73, 0x0d, /* jae +0xd <tofree> */
0x01, 0x39, /* addl %edi,(%ecx) */
0x8b, 0x3a, /* movl (%edx),%edi */
0x89, 0x38, /* movl %edi,(%eax) */
0x89, 0x02, /* movl %eax,(%edx) */
/* done: */
0x5b, /* popl %ebx */
0x5e, /* popl %esi */
0x5f, /* popl %edi */
0xc9, /* leave */
0xc3, /* ret */
/* realfree: */
0x5b, /* popl %ebx */
0x5e, /* popl %esi */
0x5f, /* popl %edi */
0xc9, /* leave */
0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */
};
/*
* Construct the initial part of malloc. off contains the offset from curthread
* to the root of the tmem structure. ep is the address of the label to error
* and jump to free. csize is the size of the largest umem_cache in ptcumem.
*/
static int
genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize)
{
uint32_t addr;
bcopy(malinit, bp, sizeof (malinit));
addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT);
bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr));
bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize));
addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV);
bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr));
bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off));
return (sizeof (malinit));
}
static int
genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mc)
{
uint32_t addr;
bcopy(freeinit, bp, sizeof (freeinit));
addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE);
bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr));
addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE);
bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr));
bcopy(&mc, bp + PTC_FRINI_MCS, sizeof (mc));
addr = PTC_JMPADDR(ep, PTC_FRINI_JOV);
bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr));
bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off));
return (sizeof (freeinit));
}
/*
* Create the initial cache entry of the specified size. The value of ap tells
* us what the address of the label to try and allocate a buffer. This value is
* an offset from the current base to that value.
*/
static int
genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap)
{
uint32_t addr;
bcopy(inicache, bp, sizeof (inicache));
bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize));
bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize));
addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP);
ASSERT(addr != 0);
bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr));
return (sizeof (inicache));
}
static int
genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap)
{
uint32_t addr;
uint8_t coff;
ASSERT(256 / PTC_ROOT_SIZE > num);
ASSERT(num != 0);
bcopy(gencache, bp, sizeof (gencache));
bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize));
bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize));
coff = num * PTC_ROOT_SIZE;
bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff));
addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP);
bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr));
return (sizeof (gencache));
}
static int
genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep)
{
uint8_t addr;
ASSERT(ep <= 0xff && ep > 7);
ASSERT(256 / PTC_ROOT_SIZE > num);
bcopy(fincache, bp, sizeof (fincache));
bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize));
bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize));
addr = num * PTC_ROOT_SIZE;
bcopy(&addr, bp + PTC_FINCACHE_NUM, sizeof (addr));
addr = ep - PTC_FINCACHE_JMP - 1;
bcopy(&addr, bp + PTC_FINCACHE_JMP, sizeof (addr));
return (sizeof (fincache));
}
static int
genasm_malfini(uint8_t *bp, uintptr_t mptr)
{
uint32_t addr;
bcopy(malfini, bp, sizeof (malfini));
addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR));
bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr));
return (sizeof (malfini));
}
static int
genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr)
{
uint32_t addr;
bcopy(freefini, bp, sizeof (freefini));
bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr));
addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR));
bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr));
return (sizeof (freefini));
}
/*
* The malloc inline assembly is constructed as follows:
*
* o Malloc prologue assembly
* o Generic first-cache check
* o n Generic cache checks (where n = _tmem_get_entries() - 2)
* o Generic last-cache check
* o Malloc epilogue assembly
*
* Generally there are at least three caches. When there is only one cache we
* only use the generic last-cache. In the case where there are two caches, we
* just leave out the middle ones.
*/
static int
genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes)
{
int ii, off;
uint8_t *bp;
size_t total;
uint32_t allocoff, erroff;
total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache);
if (nents >= 2)
total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
if (total > len)
return (1);
erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL;
allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL;
bp = base;
off = genasm_malinit(bp, umem_tmem_off, erroff,
umem_alloc_sizes[nents-1]);
bp += off;
allocoff -= off;
erroff -= off;
if (nents > 1) {
off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff);
bp += off;
allocoff -= off;
erroff -= off;
}
for (ii = 1; ii < nents - 1; ii++) {
off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff);
bp += off;
allocoff -= off;
erroff -= off;
}
bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
erroff);
bp += genasm_malfini(bp, umem_genasm_omptr);
ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
return (0);
}
static int
genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes)
{
uint8_t *bp;
int ii, off;
size_t total;
uint32_t rbufoff, retoff, erroff;
/* Assume that nents has already been audited for us */
total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache);
if (nents >= 2)
total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
if (total > len)
return (1);
erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL);
rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL);
retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL);
bp = base;
off = genasm_frinit(bp, umem_tmem_off, retoff, erroff,
umem_alloc_sizes[nents - 1]);
bp += off;
erroff -= off;
rbufoff -= off;
if (nents > 1) {
off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff);
bp += off;
erroff -= off;
rbufoff -= off;
}
for (ii = 1; ii < nents - 1; ii++) {
off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff);
bp += off;
rbufoff -= off;
erroff -= off;
}
bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
erroff);
bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr);
ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
return (0);
}
int
umem_genasm(int *alloc_sizes, umem_cache_t **caches, int ncaches)
{
int nents, i;
uint8_t *mptr;
uint8_t *fptr;
uint64_t v, *vptr;
mptr = (void *)((uintptr_t)umem_genasm_mptr + 5);
fptr = (void *)((uintptr_t)umem_genasm_fptr + 5);
if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 ||
umem_genasm_fptr == 0 || umem_genasm_fsize == 0)
return (1);
/*
* The total number of caches that we can service is the minimum of:
* o the amount supported by libc
* o the total number of umem caches
* o we use a single byte addl, so it's 255 / sizeof (uintptr_t). For
* 32-bit, this is 63.
*/
nents = _tmem_get_nentries();
if (UMEM_GENASM_MAX32 < nents)
nents = UMEM_GENASM_MAX32;
if (ncaches < nents)
nents = ncaches;
/* Based on our constraints, this is not an error */
if (nents == 0 || umem_ptc_size == 0)
return (0);
/* Take into account the jump */
if (genasm_malloc(mptr, umem_genasm_msize, nents,
alloc_sizes) != 0)
return (1);
if (genasm_free(fptr, umem_genasm_fsize, nents,
alloc_sizes) != 0)
return (1);
/* nop out the jump with a multibyte jump */
vptr = (void *)umem_genasm_mptr;
v = MULTINOP;
v |= *vptr & (0xffffffULL << 40);
(void) atomic_swap_64(vptr, v);
vptr = (void *)umem_genasm_fptr;
v = MULTINOP;
v |= *vptr & (0xffffffULL << 40);
(void) atomic_swap_64(vptr, v);
for (i = 0; i < nents; i++)
caches[i]->cache_flags |= UMF_PTC;
return (0);
}

View file

@ -34,6 +34,8 @@
void __umem_assert_failed(void) {} void __umem_assert_failed(void) {}
void _atomic_add_64(void) {} void _atomic_add_64(void) {}
void atomic_add_32_nv(void) {}
void atomic_swap_64(void) {}
void _atomic_add_32_nv(void) {} void _atomic_add_32_nv(void) {}
void bcopy(void) {} void bcopy(void) {}
void bzero(void) {} void bzero(void) {}

1
m4/libtool.m4 vendored
View file

@ -1 +0,0 @@
/usr/share/aclocal/libtool.m4

1
m4/ltoptions.m4 vendored
View file

@ -1 +0,0 @@
/usr/share/aclocal/ltoptions.m4

1
m4/ltsugar.m4 vendored
View file

@ -1 +0,0 @@
/usr/share/aclocal/ltsugar.m4

1
m4/ltversion.m4 vendored
View file

@ -1 +0,0 @@
/usr/share/aclocal/ltversion.m4

1
m4/lt~obsolete.m4 vendored
View file

@ -1 +0,0 @@
/usr/share/aclocal/lt~obsolete.m4

View file

@ -59,6 +59,15 @@ typedef struct malloc_data {
uint32_t malloc_stat; /* = UMEM_MALLOC_ENCODE(state, malloc_size) */ uint32_t malloc_stat; /* = UMEM_MALLOC_ENCODE(state, malloc_size) */
} malloc_data_t; } malloc_data_t;
/*
* Because we do not support ptcumem on non-x86 today, we have to create these
* weak aliases.
*/
#if !defined(__amd64__) && !defined(__x86_64__)
#pragma weak malloc = umem_malloc
#pragma weak free = umem_malloc_free
#endif /* !_x86 */
void * void *
malloc(size_t size_arg) malloc(size_t size_arg)
{ {

92
sparc/asm_subr.s Normal file
View file

@ -0,0 +1,92 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/asm_linkage.h>
#if defined(lint)
void *
getfp(void)
{
return (NULL);
}
void
flush_windows(void)
{
}
#ifndef UMEM_STANDALONE
void
_breakpoint(void)
{
return;
}
#endif
#else /* lint */
ENTRY(getfp)
retl
mov %fp, %o0
SET_SIZE(getfp)
#ifdef UMEM_STANDALONE
#ifdef __sparcv9
/*
* The caller doesn't need the top window to be flushed, so this
* is sufficient.
*/
ENTRY(flush_windows)
retl
flushw
SET_SIZE(flush_windows)
#else /* !__sparcv9 */
#error "This file does not provide a pre-v9 standalone flush_windows"
#endif /* __sparcv9 */
#else /* !UMEM_STANDALONE */
ENTRY(flush_windows)
retl
ta 0x3
SET_SIZE(flush_windows)
#endif /* UMEM_STANDALONE */
#ifndef UMEM_STANDALONE
ENTRY(_breakpoint)
retl
ta 0x1
SET_SIZE(_breakpoint)
#endif
#endif /* lint */

43
sparc/umem_genasm.c Normal file
View file

@ -0,0 +1,43 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
*/
/*
* Don't Panic! If you wonder why this seemingly empty file exists, it's because
* there is no sparc implementation for ptcumem. Go read libumem's big theory
* statement in lib/libumem/common/umem.c, particularly section eight.
*/
#include <inttypes.h>
#include <strings.h>
#include <umem_impl.h>
#include "umem_base.h"
const int umem_genasm_supported = 0;
/*ARGSUSED*/
int
umem_genasm(int *alloc_sizes, umem_cache_t **caches, int ncaches)
{
return (1);
}

View file

@ -21,11 +21,13 @@
*/ */
/* /*
* Copyright 2014 Garrett D'Amore <garrett@damore.org>
* Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms. * Use is subject to license terms.
*/ */
/*
#pragma ident "@(#)stub_stand.c 1.3 05/06/08 SMI" * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* /*
* Stubs for the standalone to reduce the dependence on external libraries * Stubs for the standalone to reduce the dependence on external libraries
@ -68,6 +70,13 @@ _cond_broadcast(cond_t *cvp)
return (0); return (0);
} }
/*ARGSUSED*/
int
pthread_setcancelstate(int state, int *oldstate)
{
return (0);
}
thread_t thread_t
_thr_self(void) _thr_self(void)
{ {
@ -124,3 +133,36 @@ issetugid(void)
{ {
return (1); return (1);
} }
int
_tmem_get_nentries(void)
{
return (0);
}
uintptr_t
_tmem_get_base(void)
{
return (0);
}
/*ARGSUSED*/
void
_tmem_set_cleanup(void (*f)(int, void *))
{
}
int
isspace(int c)
{
switch (c) {
case ' ':
case '\t':
case '\n':
case '\r':
case '\f':
case '\v':
return (1);
}
return (0);
}

521
test.c
View file

@ -1,521 +0,0 @@
/* malloc-test.c
* by Wolfram Gloger 1995, 1996
*
* This program is provided `as is', there is no warranty.
* https://raw.githubusercontent.com/emeryberger/Malloc-Implementations/master/allocators/CAMA/malloc-test.c
*/
#if !defined(__STDC__)
#define __STDC__ 1
#endif
#include <stdlib.h>
#include <stdio.h>
#if !defined(_WIN32)
#include <unistd.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#endif
#ifndef MEMORY
#define MEMORY 4000000l
#endif
#ifndef BINS_MAX
#define BINS_MAX 32768
#endif
#define SBINS_MAX 1024
#define SIZE 4024
#define I_MAX 5000
#ifndef I_AVERAGE
#define I_AVERAGE 200
#endif
#define ACTIONS_MAX 50
#ifndef SBRK_AVG
#define SBRK_AVG 0
#endif
#ifndef MMAP_THRESH
#define MMAP_THRESH 0
#endif
#ifndef TEST
#define TEST 4 /* minimal testing */
#endif
#ifndef TEST_INC
#define TEST_INC 2047
#endif
#if defined(__i386__) || defined(__sparc__) || defined(mips) || defined(_WIN32)
#define PAGE_SIZE 4096
#elif defined(__alpha__)
#define PAGE_SIZE 8192
#elif defined(__SVR4)
#define PAGE_SIZE 8192
#else
#define PAGE_SIZE 4096 /* default */
#endif
#define RANDOM(s) (lran2(0) % (s))
/* All probabilities are parts in 1024. */
#ifndef PROB_MEMALIGN
#define PROB_MEMALIGN 0
#endif
#ifndef PROB_REALLOC
#define PROB_REALLOC 48
#endif
#ifndef PROB_CALLOC
#define PROB_CALLOC 0
#endif
struct bin {
unsigned char *ptr;
unsigned long size;
} m[BINS_MAX], sm[SBINS_MAX];
unsigned long size = SIZE, bins=0, sbins=0;
unsigned long total_size=0, total_size_max=0;
unsigned char *base_ptr;
unsigned long base_save;
long
#if __STDC__
lran2(long seed)
#else
lran2(seed) long seed;
#endif
#define LRAN2_MAX 714025l /* constants for portable */
#define IA 1366l /* random number generator */
#define IC 150889l /* (see Numerical Recipes p. 211) */
{
static int first = 1;
static long x, y, v[97];
int j;
if(seed || first) {
first = 0;
x = (IC - seed) % LRAN2_MAX;
if(x < 0) x = -x;
for(j=0; j<97; j++) {
x = (IA*x + IC) % LRAN2_MAX;
v[j] = x;
}
x = (IA*x + IC) % LRAN2_MAX;
y = x;
}
j = y % 97;
y = v[j];
x = (IA*x + IC) % LRAN2_MAX;
v[j] = x;
return y;
}
#undef IA
#undef IC
void
#if __STDC__
mem_init(unsigned char *ptr, unsigned long size)
#else
mem_init(ptr, size) unsigned char *ptr; unsigned long size;
#endif
{
unsigned long i, j;
if(size == 0) return;
if(size > sizeof(unsigned long)) {
/* Try the complete initial word. */
*(unsigned long *)ptr = (unsigned long)ptr ^ size;
i = TEST_INC;
} else
i = 0;
for(; i<size; i+=TEST_INC) {
j = (unsigned long)ptr ^ i;
ptr[i] = ((j ^ (j>>8)) & 0xFF);
}
j = (unsigned long)ptr ^ (size-1);
ptr[size-1] = ((j ^ (j>>8)) & 0xFF);
}
int
#if __STDC__
mem_check(unsigned char *ptr, unsigned long size)
#else
mem_check(ptr, size) unsigned char *ptr; unsigned long size;
#endif
{
unsigned long i, j;
if(size == 0) return 0;
if(size > sizeof(unsigned long)) {
if(*(unsigned long *)ptr != ((unsigned long)ptr ^ size)) {
printf ("failed size check: expected %lx, found %lx!\n",
((unsigned long) ptr ^ size), *(unsigned long *) ptr);
return 1;
}
i = TEST_INC;
} else
i = 0;
for(; i<size; i+=TEST_INC) {
j = (unsigned long)ptr ^ i;
if(ptr[i] != ((j ^ (j>>8)) & 0xFF)) return 2;
}
j = (unsigned long)ptr ^ (size-1);
if(ptr[size-1] != ((j ^ (j>>8)) & 0xFF)) {
printf ("failed last byte check: expected %lx, found %x!\n",
((unsigned long) ((j ^ (j>>8)) & 0xFF)), ptr[size-1]);
return 3;
}
return 0;
}
long
#if __STDC__
random_size(long max)
#else
random_size(max) long max;
#endif
{
long r1, r2, r, max_pages;
max_pages = max/PAGE_SIZE;
if(max_pages > 0) {
r1 = RANDOM(1024);
r2 = (r1 & 7)*4;
if(r1 < 512) {
/* small value near power of two */
r = (1L << (r1 >> 6)) + r2;
} else if(r1 < 512+20) {
/* value near a multiple of the page size */
r = (RANDOM(max_pages)+1)*PAGE_SIZE + r2 - 16;
/*printf("r = %4lx\n", r);*/
} else r = RANDOM(max) + 1;
} else r = RANDOM(max) + 1;
/*if(r <= 0) exit(-1);*/
return r;
}
void
#if __STDC__
bin_alloc(struct bin *m)
#else
bin_alloc(m) struct bin *m;
#endif
{
long r, key;
unsigned long sz;
#if TEST > 0
if(mem_check(m->ptr, m->size)) {
printf("bin_alloc: memory corrupt at %p, size=%lu!\n", m->ptr, m->size);
exit(1);
}
#endif
total_size -= m->size;
r = RANDOM(1024);
if(r < PROB_MEMALIGN) {
#if !defined(_WIN32)
if(m->size > 0) free(m->ptr);
m->size = random_size(size);
#if PROB_MEMALIGN
m->ptr = (unsigned char *)memalign(4 << RANDOM(8), m->size);
#endif
#endif
} else if(r < (PROB_MEMALIGN + PROB_REALLOC)) {
if(m->size == 0) {
#ifndef __sparc__
m->ptr = NULL;
#else
/* SunOS4 does not realloc() a NULL pointer */
m->ptr = (unsigned char *)malloc(1);
#endif
}
#if TEST > 2
key = RANDOM(256);
sz = m->size;
for(r=0; r<sz; r++) m->ptr[r] = (r ^ key) & 0xFF;
#endif
m->size = random_size(size);
/*printf("realloc %d\n", (int)m->size);*/
m->ptr = (unsigned char *)realloc(m->ptr, m->size);
#if TEST > 2
if(m->size < sz) sz = m->size;
for(r=0; r<sz; r++)
if(m->ptr[r] != ((r ^ key) & 0xFF)) {
printf("realloc bug !\n");
exit(1);
}
#endif
} else if(r < (PROB_MEMALIGN + PROB_REALLOC + PROB_CALLOC)) {
if(m->size > 0) free(m->ptr);
m->size = random_size(size);
m->ptr = (unsigned char *)calloc(m->size, 1);
#if TEST > 2
for(r=0; r<m->size; r++)
if(m->ptr[r] != '\0') {
printf("calloc bug !\n");
exit(1);
}
#endif
} else { /* normal malloc call */
if(m->size > 0) free(m->ptr);
m->size = random_size(size);
m->ptr = (unsigned char *)malloc(m->size);
}
if(!m->ptr) {
printf("out of memory!\n");
exit(1);
}
total_size += m->size;
if(total_size > total_size_max) total_size_max = total_size;
#if TEST > 0
mem_init(m->ptr, m->size);
#endif
if(m->ptr < base_ptr) {
#ifdef VERBOSE
printf("hmmm, allocating below brk...\n");
#endif
base_ptr = m->ptr;
}
}
void
#if __STDC__
bin_free(struct bin *m)
#else
bin_free(m) struct bin *m;
#endif
{
if(m->size == 0) return;
#if TEST > 0
if(mem_check(m->ptr, m->size)) {
printf("bin_free: memory corrupt!\n");
exit(1);
}
#endif
total_size -= m->size;
free(m->ptr);
m->size = 0;
}
void
bin_test()
{
unsigned int b;
int v;
// printf ("bin_test.\n");
for(b=0; b<bins; b++) {
if((v = mem_check(m[b].ptr, m[b].size))) {
printf("bin_test: memory corrupt! m[%d].ptr = %hhn, m[%d].size = %ld\n",
b, m[b].ptr, b, m[b].size);
printf ("error = %d\n", v);
exit(1);
}
}
for(b=0; b<sbins; b++) {
if(mem_check(sm[b].ptr, sm[b].size)) {
printf("bin_test: memory corrupt! sm[%d].ptr = %hhn, sm[%d].size = %ld\n",
b, sm[b].ptr, b, sm[b].size);
exit(1);
}
}
}
void
print_times()
{
#if !defined(_WIN32)
struct rusage ru;
long total_sec, total_usec;
getrusage(RUSAGE_SELF, &ru);
printf(" u=%ld.%06ldsec",
(long)ru.ru_utime.tv_sec, (long)ru.ru_utime.tv_usec);
printf(" s=%ld.%06ldsec",
(long)ru.ru_stime.tv_sec, (long)ru.ru_stime.tv_usec);
total_usec = (long)ru.ru_utime.tv_usec + (long)ru.ru_stime.tv_usec;
total_sec = (long)ru.ru_utime.tv_sec + (long)ru.ru_stime.tv_sec;
if(total_usec >= 1000000) {
total_usec -= 1000000;
total_sec++;
}
printf(" t=%ld.%06ldsec", total_sec, total_usec);
#endif
}
int
#if __STDC__
main(int argc, char *argv[])
#else
main(argc, argv) int argc; char *argv[];
#endif
{
int i, j, next_i, count, max=I_MAX, actions;
unsigned int b;
long sbrk_max, sum;
double sbrk_used_sum, total_size_sum;
if(argc > 1) max = atoi(argv[1]);
if(argc > 2) size = atoi(argv[2]);
lran2((long)max ^ size);
bins = (MEMORY/size)*4;
if(bins > BINS_MAX) bins = BINS_MAX;
#if 0 // FIX ME? Disable sbrk...
base_ptr = (unsigned char *)sbrk(0);
sum = (long)base_ptr % PAGE_SIZE;
if(sum > 0) {
if((char *)sbrk((long)PAGE_SIZE - sum) == (char *)-1) exit(1);
base_ptr += (long)PAGE_SIZE - sum;
/*printf("base_ptr = %lx\n", (long)base_ptr);*/
}
/* attempt to fill up the region below the initial brk */
void* dummy = 0;
for(i=0; i<10000; i++) {
dummy = malloc(1);
if(dummy >= (void*)base_ptr) break;
}
free(dummy);
base_save = ((unsigned long)base_ptr >> 24) << 24;
#endif
#if MMAP_THRESH > 0
if(!mallopt(-3, MMAP_THRESH)) printf("mallopt failed!\n");
if(!mallopt(-4, 200)) printf("mallopt failed!\n");
#endif
#ifdef VERBOSE
printf("# mmap_thresh=%d\n", MMAP_THRESH);
printf("# bins=%d max=%d size=%d\n", bins, max, size);
printf("# base=%lx\n", base_save);
#endif
for(b=0; b<bins; b++) {
if(RANDOM(2) == 0) bin_alloc(&m[b]);
else m[b].size = 0;
}
sbrk_max = 0;
sbrk_used_sum = total_size_sum = 0.0;
for(i=next_i=count=0; i<=max;) {
#if TEST > 1
bin_test();
#endif
#ifdef MSTATS
malloc_stats();
#endif
actions = RANDOM(ACTIONS_MAX);
for(j=0; j<actions; j++) {
b = RANDOM(bins);
bin_free(&m[b]);
#if TEST > 3
bin_test();
#endif
}
i += actions;
#ifdef AFTER_FREE
AFTER_FREE;
#endif
#if SBRK_AVG > 0
if(sbins<SBINS_MAX && RANDOM(SBRK_AVG)==0) {
/* throw in an explicit sbrk call */
sm[sbins].size = RANDOM(10000)+1;
sm[sbins].ptr = sbrk(sm[sbins].size);
if(sbins>0 && sm[sbins].ptr==(sm[sbins-1].ptr+sm[sbins-1].size)) {
sm[sbins-1].size += sm[sbins].size;
sbins--;
}
#ifdef VERBOSE
printf("sbrk #%d %p %ld\n", sbins, sm[sbins].ptr, sm[sbins].size);
#endif
#if TEST > 0
mem_init(sm[sbins].ptr, sm[sbins].size);
#endif
sbins++;
}
#endif
actions = RANDOM(ACTIONS_MAX);
for(j=0; j<actions; j++) {
b = RANDOM(bins);
bin_alloc(&m[b]);
#if TEST > 3
bin_test();
#endif
}
i += actions;
if(i >= next_i) { /* gather statistics */
count++;
#if !defined(_WIN32)
sum = (long)sbrk(0);
#else
sum = 0;
#endif
if(sum > sbrk_max) sbrk_max = sum;
sbrk_used_sum += sum;
total_size_sum += (double)total_size;
#ifdef VERBOSE
printf("%8d %7lu\n", i, total_size);
#endif
next_i += I_AVERAGE;
}
}
/* Correct sbrk values. */
sbrk_max -= (long)base_ptr;
sbrk_used_sum -= (double)count*(long)base_ptr;
#ifdef VERBOSE
printf("# initial brk: %lx\n", (long)base_ptr);
printf("# max. sbrk()'ed memory: %ld bytes\n", sbrk_max);
printf("# avg. sbrk()'ed memory: %ld bytes\n",
(long)(sbrk_used_sum/count));
printf("# current size allocated: %ld bytes\n", total_size);
printf("# maximum size allocated: %ld bytes\n", total_size_max);
printf("# average size allocated: %.1f bytes\n", total_size_sum/count);
printf("# current heap waste: %.2f%%\n",
(1.0 - (double)total_size_max/sbrk_max)*100.0);
printf("# average heap waste: %.2f%%\n",
(1.0 - (double)total_size_sum/sbrk_used_sum)*100.0);
printf("# total sbrk calls performed: %d\n", sbins);
#else
printf("size=%7ld waste=%7.3f%%", size,
/* (1.0 - (double)total_size_max/sbrk_max)*100.0, */
(1.0 - (double)total_size_sum/sbrk_used_sum)*100.0);
print_times();
printf("\n");
#endif
return 0;
}
/* testing:
* gcc -Wall -Werror -fpic -march=native -mtune=native -Ofast test.c -o test
* gcc -Wall -Werror -fpic -march=native -mtune=native -Ofast test.c -o test -lumem -lumem_malloc
*
* https://github.com/sharkdp/hyperfine
*
$ ldd test
linux-vdso.so.1 (0x00007ffc607de000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fb619a4d000)
/lib64/ld-linux-x86-64.so.2 (0x00007fb619ce9000)
$ ldd test_umem
linux-vdso.so.1 (0x00007ffd1ff59000)
libumem_malloc.so.0 => /usr/local/lib/libumem_malloc.so.0 (0x00007fe885b18000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fe885926000)
libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007fe885903000)
libumem.so.0 => /usr/local/lib/libumem.so.0 (0x00007fe88585b000)
/lib64/ld-linux-x86-64.so.2 (0x00007fe885bc7000)
libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007fe885855000)
$ hyperfine --warmup 3 --min-runs 10 ./test
Benchmark #1: ./test
Time (mean ± σ): 82.1 ms ± 1.1 ms [User: 81.0 ms, System: 0.9 ms]
Range (min max): 79.3 ms 84.4 ms 35 runs
$ hyperfine --warmup 3 --min-runs 10 ./test_umem
Benchmark #1: ./test_umem
Time (mean ± σ): 85.7 ms ± 1.5 ms [User: 83.2 ms, System: 2.5 ms]
Range (min max): 81.8 ms 89.2 ms 34 runs
*/
/*
* Local variables:
* tab-width:4
* compile-command: "gcc -fpic -Wall -Werror -Ofast -march=native -mtune=native test.c -o test"
* End:
*/

280
umem.c
View file

@ -28,7 +28,10 @@
* Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved.
*/ */
/* #pragma ident "@(#)umem.c 1.11 05/06/08 SMI" */ /*
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2015 by Delphix. All rights reserved.
*/
/*! /*!
* \mainpage Main Page * \mainpage Main Page
@ -78,7 +81,7 @@
* *
* 1. Overview * 1. Overview
* ----------- * -----------
* umem is very close to kmem in implementation. There are four major * umem is very close to kmem in implementation. There are seven major
* areas of divergence: * areas of divergence:
* *
* * Initialization * * Initialization
@ -91,6 +94,10 @@
* *
* * lock ordering * * lock ordering
* *
* * changing UMEM_MAXBUF
*
* * Per-thread caching for malloc/free
*
* 2. Initialization * 2. Initialization
* ----------------- * -----------------
* kmem is initialized early on in boot, and knows that no one will call * kmem is initialized early on in boot, and knows that no one will call
@ -402,6 +409,232 @@
* *
* The second place to update, which is not required, is the umem_alloc_sizes. * The second place to update, which is not required, is the umem_alloc_sizes.
* These determine the default cache sizes that we're going to support. * These determine the default cache sizes that we're going to support.
*
* 8. Per-thread caching for malloc/free
* -------------------------------------
*
* "Time is an illusion. Lunchtime doubly so." -- Douglas Adams
*
* Time may be an illusion, but CPU cycles aren't. While libumem is designed
* to be a highly scalable allocator, that scalability comes with a fixed cycle
* penalty even in the absence of contention: libumem must acquire (and release
* a per-CPU lock for each allocation. When contention is low and malloc(3C)
* frequency is high, this overhead can dominate execution time. To alleviate
* this, we allow for per-thread caching, a lock-free means of caching recent
* deallocations on a per-thread basis for use in satisfying subsequent calls
*
* In addition to improving performance, we also want to:
* * Minimize fragmentation
* * Not add additional memory overhead (no larger malloc tags)
*
* In the ulwp_t of each thread there is a private data structure called a
* umem_t that looks like:
*
* typedef struct {
* size_t tm_size;
* void *tm_roots[NTMEMBASE]; (Currently 16)
* } tmem_t;
*
* Each of the roots is treated as the head of a linked list. Each entry in the
* list can be thought of as a void ** which points to the next entry, until one
* of them points to NULL. If the head points to NULL, the list is empty.
*
* Each head corresponds to a umem_cache. Currently there is a linear mapping
* where the first root corresponds to the first cache, second root to the
* second cache, etc. This works because every allocation that malloc makes to
* umem_alloc that can be satisified by a umem_cache will actually return a
* number of bytes equal to the size of that cache. Because of this property and
* a one to one mapping between caches and roots we can guarantee that every
* entry in a given root's list will be able to satisfy the same requests as the
* corresponding cache.
*
* The choice of sixteen roots is based on where we believe we get the biggest
* bang for our buck. The per-thread caches will cache up to 256 byte and 448
* byte allocations on ILP32 and LP64 respectively. Generally applications plan
* more carefully how they do larger allocations than smaller ones. Therefore
* sixteen roots is a reasonable compromise between the amount of additional
* overhead per thread, and the likelihood of a program to benefit from it.
*
* The maximum amount of memory that can be cached in each thread is determined
* by the perthread_cache UMEM_OPTION. It corresponds to the umem_ptc_size
* value. The default value for this is currently 1 MB. Once umem_init() has
* finished this cannot be directly tuned without directly modifying the
* instruction text. If, upon calling free(3C), the amount cached would exceed
* this maximum, we instead actually return the buffer to the umem_cache instead
* of holding onto it in the thread.
*
* When a thread calls malloc(3C) it first determines which umem_cache it
* would be serviced by. If the allocation is not covered by ptcumem it goes to
* the normal malloc instead. Next, it checks if the tmem_root's list is empty
* or not. If it is empty, we instead go and allocate the memory from
* umem_alloc. If it is not empty, we remove the head of the list, set the
* appropriate malloc tags, and return that buffer.
*
* When a thread calls free(3C) it first looks at the malloc tag and if it is
* invalid or the allocation exceeds the largest cache in ptcumem and sends it
* off to the original free() to handle and clean up appropriately. Next, it
* checks if the allocation size is covered by one of the per-thread roots and
* if it isn't, it passes it off to the original free() to be released. Finally,
* before it inserts this buffer as the head, it checks if adding this buffer
* would put the thread over its maximum cache size. If it would, it frees the
* buffer back to the umem_cache. Otherwise it increments the threads total
* cached amount and makes the buffer the new head of the appropriate tm_root.
*
* When a thread exits, all of the buffers that it has in its per-thread cache
* will be passed to umem_free() and returned to the appropriate umem_cache.
*
* 8.1 Handling addition and removal of umem_caches
* ------------------------------------------------
*
* The set of umem_caches that are used to back calls to umem_alloc() and
* ultimately malloc() are determined at program execution time. The default set
* of caches is defined below in umem_alloc_sizes[]. Various umem_options exist
* that modify the set of caches: size_add, size_clear, and size_remove. Because
* the set of caches can only be determined once umem_init() has been called and
* we have the additional goals of minimizing additional fragmentation and
* metadata space overhead in the malloc tags, this forces our hand to go down a
* slightly different path: the one tread by fasttrap and trapstat.
*
* During umem_init we're going to dynamically construct a new version of
* malloc(3C) and free(3C) that utilizes the known cache sizes and then ensure
* that ptcmalloc and ptcfree replace malloc and free as entries in the plt. If
* ptcmalloc and ptcfree cannot handle a request, they simply jump to the
* original libumem implementations.
*
* After creating all of the umem_caches, but before making them visible,
* umem_cache_init checks that umem_genasm_supported is non-zero. This value is
* set by each architecture in $ARCH/umem_genasm.c to indicate whether or not
* they support this. If the value is zero, then this process is skipped.
* Similarly, if the cache size has been tuned to zero by UMEM_OPTIONS, then
* this is also skipped.
*
* In umem_genasm.c, each architecture's implementation implements a single
* function called umem_genasm() that is responsible for generating the
* appropriate versions of ptcmalloc() and ptcfree(), placing them in the
* appropriate memory location, and finally doing the switch from malloc() and
* free() to ptcmalloc() and ptcfree(). Once the change has been made, there is
* no way to switch back, short of restarting the program or modifying program
* text with mdb.
*
* 8.2 Modifying the Procedure Linkage Table (PLT)
* -----------------------------------------------
*
* The last piece of this puzzle is how we actually jam ptcmalloc() into the
* PLT. To handle this, we have defined two functions, _malloc and _free and
* used a special mapfile directive to place them into the a readable,
* writeable, and executable segment. Next we use a standard #pragma weak for
* malloc and free and direct them to those symbols. By default, those symbols
* have text defined as nops for our generated functions and when they're
* invoked, they jump to the default malloc and free functions.
*
* When umem_genasm() is called, it goes through and generates new malloc() and
* free() functions in the text provided for by _malloc and _free just after the
* jump. Once both have been successfully generated, umem_genasm() nops over the
* original jump so that we now call into the genasm versions of these
* functions.
*
* 8.3 umem_genasm()
* -----------------
*
* umem_genasm() is currently implemented for i386 and amd64. This section
* describes the theory behind the construction. For specific byte code to
* assembly instructions and niceish C and asm versions of ptcmalloc and
* ptcfree, see the individual umem_genasm.c files. The layout consists of the
* following sections:
*
* o. function-specfic prologue
* o. function-generic cache-selecting elements
* o. function-specific epilogue
*
* There are three different generic cache elements that exist:
*
* o. the last or only cache
* o. the intermediary caches if more than two
* o. the first one if more than one cache
*
* The malloc and free prologues and epilogues mimic the necessary portions of
* libumem's malloc and free. This includes things like checking for size
* overflow, setting and verifying the malloc tags.
*
* It is an important constraint that these functions do not make use of the
* call instruction. The only jmp outside of the individual functions is to the
* original libumem malloc and free respectively. Because doing things like
* setting errno or raising an internal umem error on improper malloc tags would
* require using calls into the PLT, whenever we encounter one of those cases we
* just jump to the original malloc and free functions reusing the same stack
* frame.
*
* Each of the above sections, the three caches, and the malloc and free
* prologue and epilogue are implemented as blocks of machine code with the
* corresponding assembly in comments. There are known offsets into each block
* that corresponds to locations of data and addresses that we only know at run
* time. These blocks are copied as necessary and the blanks filled in
* appropriately.
*
* As mentioned in section 8.2, the trampoline library uses specifically named
* variables to communicate the buffers and size to use. These variables are:
*
* o. umem_genasm_mptr: The buffer for ptcmalloc
* o. umem_genasm_msize: The size in bytes of the above buffer
* o. umem_genasm_fptr: The buffer for ptcfree
* o. umem_genasm_fsize: The size in bytes of the above buffer
*
* Finally, to enable the generated assembly we need to remove the previous jump
* to the actual malloc that exists at the start of these buffers. On x86, this
* is a five byte region. We could zero out the jump offset to be a jmp +0, but
* using nops can be faster. We specifically use a single five byte nop on x86
* as it is faster. When porting ptcumem to other architectures, the various
* opcode changes and options should be analyzed.
*
* 8.4 Interface with libc.so
* --------------------------
*
* The tmem_t structure as described in the beginning of section 8, is part of a
* private interface with libc. There are three functions that exist to cover
* this. They are not documented in man pages or header files. They are in the
* SUNWprivate part of libc's mapfile.
*
* o. _tmem_get_base(void)
*
* Returns the offset from the ulwp_t (curthread) to the tmem_t structure.
* This is a constant for all threads and is effectively a way to to do
* ::offsetof ulwp_t ul_tmem without having to know the specifics of the
* structure outside of libc.
*
* o. _tmem_get_nentries(void)
*
* Returns the number of roots that exist in the tmem_t. This is one part
* of the cap on the number of umem_caches that we can back with tmem.
*
* o. _tmem_set_cleanup(void (*)(void *, int))
*
* This sets a clean up handler that gets called back when a thread exits.
* There is one call per buffer, the void * is a pointer to the buffer on
* the list, the int is the index into the roots array for this buffer.
*
* 8.5 Tuning and disabling per-thread caching
* -------------------------------------------
*
* There is only one tunable for per-thread caching: the amount of memory each
* thread should be able to cache. This is specified via the perthread_cache
* UMEM_OPTION option. No attempt is made to to sanity check the specified
* value; the limit is simply the maximum value of a size_t.
*
* If the perthread_cache UMEM_OPTION is set to zero, nomagazines was requested,
* or UMEM_DEBUG has been turned on then we will never call into umem_genasm;
* however, the trampoline audit library and jump will still be in place.
*
* 8.6 Observing efficacy of per-thread caching
* --------------------------------------------
*
* To understand the efficacy of per-thread caching, use the ::umastat dcmd
* to see the percentage of capacity consumed on a per-thread basis, the
* degree to which each umem cache contributes to per-thread cache consumption,
* and the number of buffers in per-thread caches on a per-umem cache basis.
* If more detail is required, the specific buffers in a per-thread cache can
* be iterated over with the umem_ptc_* walkers. (These walkers allow an
* optional ulwp_t to be specified to iterate only over a particular thread's
* cache.)
*/ */
#include "config.h" #include "config.h"
@ -524,8 +757,10 @@ size_t umem_lite_minsize = 0; /* minimum buffer size for UMF_LITE */
size_t umem_lite_maxalign = 1024; /* maximum buffer alignment for UMF_LITE */ size_t umem_lite_maxalign = 1024; /* maximum buffer alignment for UMF_LITE */
size_t umem_maxverify; /* maximum bytes to inspect in debug routines */ size_t umem_maxverify; /* maximum bytes to inspect in debug routines */
size_t umem_minfirewall; /* hardware-enforced redzone threshold */ size_t umem_minfirewall; /* hardware-enforced redzone threshold */
size_t umem_ptc_size = 1048576; /* size of per-thread cache (in bytes) */
uint_t umem_flags = 0; uint_t umem_flags = 0;
uintptr_t umem_tmem_off;
mutex_t umem_init_lock = DEFAULTMUTEX; /* locks initialization */ mutex_t umem_init_lock = DEFAULTMUTEX; /* locks initialization */
cond_t umem_init_cv = DEFAULTCV; /* initialization CV */ cond_t umem_init_cv = DEFAULTCV; /* initialization CV */
@ -533,6 +768,8 @@ thread_t umem_init_thr; /* thread initializing */
int umem_init_env_ready; /* environ pre-initted */ int umem_init_env_ready; /* environ pre-initted */
int umem_ready = UMEM_READY_STARTUP; int umem_ready = UMEM_READY_STARTUP;
int umem_ptc_enabled; /* per-thread caching enabled */
static umem_nofail_callback_t *nofail_callback; static umem_nofail_callback_t *nofail_callback;
static mutex_t umem_nofail_exit_lock = DEFAULTMUTEX; static mutex_t umem_nofail_exit_lock = DEFAULTMUTEX;
static thread_t umem_nofail_exit_thr; static thread_t umem_nofail_exit_thr;
@ -568,8 +805,6 @@ extern thread_t _thr_self(void);
#ifndef CPUHINT #ifndef CPUHINT
# define CPUHINT() ((int)(_thr_self())) # define CPUHINT() ((int)(_thr_self()))
#endif #endif
#define CPUHINT_MAX() INT_MAX #define CPUHINT_MAX() INT_MAX
#define CPU(mask) (umem_cpus + (CPUHINT() & (mask))) #define CPU(mask) (umem_cpus + (CPUHINT() & (mask)))
@ -1062,6 +1297,9 @@ umem_alloc_retry(umem_cache_t *cp, int umflag)
* Initialization failed. Do normal failure processing. * Initialization failed. Do normal failure processing.
*/ */
} }
if (umem_flags & UMF_CHECKNULL) {
umem_err_recoverable("umem: out of heap space");
}
if (umflag & UMEM_NOFAIL) { if (umflag & UMEM_NOFAIL) {
int def_result = UMEM_CALLBACK_EXIT(255); int def_result = UMEM_CALLBACK_EXIT(255);
int result = def_result; int result = def_result;
@ -2917,6 +3155,24 @@ umem_alloc_sizes_remove(size_t size)
umem_alloc_sizes[i] = 0; umem_alloc_sizes[i] = 0;
} }
/*
* We've been called back from libc to indicate that thread is terminating and
* that it needs to release the per-thread memory that it has. We get to know
* which entry in the thread's tmem array the allocation came from. Currently
* this refers to first n umem_caches which makes this a pretty simple indexing
* job.
*/
static void
umem_cache_tmem_cleanup(void *buf, int entry)
{
size_t size;
umem_cache_t *cp;
size = umem_alloc_sizes[entry];
cp = umem_alloc_table[(size - 1) >> UMEM_ALIGN_SHIFT];
_umem_cache_free(cp, buf);
}
static int static int
umem_cache_init(void) umem_cache_init(void)
{ {
@ -3032,6 +3288,16 @@ umem_cache_init(void)
umem_alloc_caches[i] = cp; umem_alloc_caches[i] = cp;
} }
umem_tmem_off = _tmem_get_base();
_tmem_set_cleanup(umem_cache_tmem_cleanup);
if (umem_genasm_supported && !(umem_flags & UMF_DEBUG) &&
!(umem_flags & UMF_NOMAGAZINE) &&
umem_ptc_size > 0) {
umem_ptc_enabled = umem_genasm(umem_alloc_sizes,
umem_alloc_caches, i) == 0 ? 1 : 0;
}
/* /*
* Initialization cannot fail at this point. Make the caches * Initialization cannot fail at this point. Make the caches
* visible to umem_alloc() and friends. * visible to umem_alloc() and friends.
@ -3058,7 +3324,12 @@ umem_cache_init(void)
* umem_startup() is called early on, and must be called explicitly if we're * umem_startup() is called early on, and must be called explicitly if we're
* the standalone version. * the standalone version.
*/ */
#ifdef UMEM_STANDALONE
void void
#else
#pragma init(umem_startup)
static void
#endif
umem_startup(caddr_t start, size_t len, size_t pagesize, caddr_t minstack, umem_startup(caddr_t start, size_t len, size_t pagesize, caddr_t minstack,
caddr_t maxstack) caddr_t maxstack)
{ {
@ -3365,4 +3636,3 @@ __umem_init (void)
umem_startup(NULL, 0, 0, NULL, NULL); umem_startup(NULL, 0, 0, NULL, NULL);
} }
#endif #endif

View file

@ -1,26 +1,13 @@
'\" te '\" te
.\" CDDL HEADER START .\" Copyright (c) 2008 Sun Microsystems, Inc. All Rights Reserved.
.\" .\" Copyright (c) 2012 Joyent, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" Common Development and Distribution License (the "License"). .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" You may not use this file except in compliance with the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
.\" .TH UMEM_ALLOC 3MALLOC "Mar 24, 2008"
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
.\" or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions
.\" and limitations under the License.
.\"
.\" When distributing Covered Code, include this CDDL HEADER in each
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" CDDL HEADER END
.\" Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved.
.TH umem_alloc 3MALLOC "26 Aug 2002" "SunOS 5.11" "Memory Allocation Library Functions"
.SH NAME .SH NAME
umem_alloc, umem_zalloc, umem_free, umem_nofail_callback \- fast, scalable memory allocation umem_alloc, umem_zalloc, umem_free, umem_nofail_callback \- fast, scalable
memory allocation
.SH SYNOPSIS .SH SYNOPSIS
.LP .LP
.nf .nf
@ -29,162 +16,151 @@ cc [ \fIflag \&.\|.\|.\fR ] \fIfile\fR\&.\|.\|. \fB-lumem\fR [ \fIlibrary \&.\|.
\fBvoid *\fR\fBumem_alloc\fR(\fBsize_t\fR \fIsize\fR, \fBint\fR \fIflags\fR); \fBvoid *\fR\fBumem_alloc\fR(\fBsize_t\fR \fIsize\fR, \fBint\fR \fIflags\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid *\fR\fBumem_zalloc\fR(\fBsize_t\fR \fIsize\fR, \fBint\fR \fIflags\fR); \fBvoid *\fR\fBumem_zalloc\fR(\fBsize_t\fR \fIsize\fR, \fBint\fR \fIflags\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid\fR \fBumem_free\fR(\fBvoid *\fR\fIbuf\fR, \fBsize_t\fR \fIsize\fR); \fBvoid\fR \fBumem_free\fR(\fBvoid *\fR\fIbuf\fR, \fBsize_t\fR \fIsize\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid\fR \fBumem_nofail_callback\fR(\fB(int (*\fR\fIcallback\fR)(void)); \fBvoid\fR \fBumem_nofail_callback\fR(\fB(int (*\fR\fIcallback\fR)(void));
.fi .fi
.LP .LP
.nf .nf
\fBvoid *\fR\fBmalloc\fR(\fBsize_t\fR \fIsize\fR); \fBvoid *\fR\fBmalloc\fR(\fBsize_t\fR \fIsize\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid *\fR\fBcalloc\fR(\fBsize_t\fR \fInelem\fR, \fBsize_t\fR \fIelsize\fR); \fBvoid *\fR\fBcalloc\fR(\fBsize_t\fR \fInelem\fR, \fBsize_t\fR \fIelsize\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid\fR \fBfree\fR(\fBvoid *\fR\fIptr\fR); \fBvoid\fR \fBfree\fR(\fBvoid *\fR\fIptr\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid *\fR\fBmemalign\fR(\fBsize_t\fR \fIalignment\fR, \fBsize_t\fR \fIsize\fR); \fBvoid *\fR\fBmemalign\fR(\fBsize_t\fR \fIalignment\fR, \fBsize_t\fR \fIsize\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid *\fR\fBrealloc\fR(\fBvoid *\fR\fIptr\fR, \fBsize_t\fR \fIsize\fR); \fBvoid *\fR\fBrealloc\fR(\fBvoid *\fR\fIptr\fR, \fBsize_t\fR \fIsize\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid *\fR\fBvalloc\fR(\fBsize_t\fR \fIsize\fR); \fBvoid *\fR\fBvalloc\fR(\fBsize_t\fR \fIsize\fR);
.fi .fi
.SH DESCRIPTION .SH DESCRIPTION
.LP
The \fBumem_alloc()\fR function returns a pointer to a block of \fIsize\fR bytes suitably aligned for any variable type. The initial contents of memory allocated using \fBumem_alloc()\fR is undefined. The \fIflags\fR argument determines
the behavior of \fBumem_alloc()\fR if it is unable to fulfill the request. The \fIflags\fR argument can take the following values:
.sp .sp
.LP
The \fBumem_alloc()\fR function returns a pointer to a block of \fIsize\fR
bytes suitably aligned for any variable type. The initial contents of memory
allocated using \fBumem_alloc()\fR is undefined. The \fIflags\fR argument
determines the behavior of \fBumem_alloc()\fR if it is unable to fulfill the
request. The \fIflags\fR argument can take the following values:
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBUMEM_DEFAULT\fR\fR \fB\fBUMEM_DEFAULT\fR\fR
.ad .ad
.RS 14n .RS 16n
.rt
Return \fINULL\fR on failure. Return \fINULL\fR on failure.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBUMEM_NOFAIL\fR\fR \fB\fBUMEM_NOFAIL\fR\fR
.ad .ad
.RS 14n .RS 16n
.rt Call an optional \fIcallback\fR (set with \fBumem_nofail_callback()\fR) on
Call an optional \fIcallback\fR (set with \fBumem_nofail_callback()\fR) on failure. The \fIcallback\fR takes no arguments and can finish by: failure. The \fIcallback\fR takes no arguments and can finish by:
.sp
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
returning \fBUMEM_CALLBACK_RETRY\fR, in which case the allocation will be retried. If the allocation fails, the callback will be invoked again. returning \fBUMEM_CALLBACK_RETRY\fR, in which case the allocation will be
.sp retried. If the allocation fails, the callback will be invoked again.
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
returning \fBUMEM_CALLBACK_EXIT\fR(\fIstatus\fR), in which case returning \fBUMEM_CALLBACK_EXIT\fR(\fIstatus\fR), in which case \fBexit\fR(2)
\fBexit\fR(2) is invoked with \fIstatus\fR is invoked with \fIstatus\fR as its argument. The \fBexit()\fR function is
as its argument. The \fBexit()\fR function is called only once. If multiple threads return from the \fBUMEM_NOFAIL\fR callback with \fBUMEM_CALLBACK_EXIT\fR(\fIstatus\fR), one will call \fBexit()\fR while the other blocks until \fBexit()\fR terminates the program. called only once. If multiple threads return from the \fBUMEM_NOFAIL\fR
.sp callback with \fBUMEM_CALLBACK_EXIT\fR(\fIstatus\fR), one will call
\fBexit()\fR while the other blocks until \fBexit()\fR terminates the program.
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
invoking a context-changing function ( invoking a context-changing function (\fBsetcontext\fR(2)) or a non-local jump
\fBsetcontext\fR(2)) or a non-local jump ( (\fBlongjmp\fR(3C) or \fBsiglongjmp\fR(3C), or ending the current thread of
\fBlongjmp\fR(3C) or control (\fBthr_exit\fR(3C) or \fBpthread_exit\fR(3C). The application is
\fBsiglongjmp\fR(3C), or ending the current thread of control ( responsible for any necessary cleanup. The state of \fBlibumem\fR remains
\fBthr_exit\fR(3C) or consistent.
\fBpthread_exit\fR(3C). The application is responsible for any necessary cleanup. The state of \fBlibumem\fR remains consistent. .RE
If no callback has been set or the callback has been set to \fINULL\fR,
\fBumem_alloc\fR(..., \fBUMEM_NOFAIL\fR) behaves as though the callback
returned \fBUMEM_CALLBACK_EXIT\fR(255).
.sp .sp
The \fBlibumem\fR library can call callbacks from any place that a
\fBUMEM_NOFAIL\fR allocation is issued. In multithreaded applications,
callbacks are expected to perform their own concurrency management.
.RE .RE
If no callback has been set or the callback has been set to \fINULL\fR, \fBumem_alloc\fR(..., \fBUMEM_NOFAIL\fR) behaves as though the callback returned \fBUMEM_CALLBACK_EXIT\fR(255).
.sp .sp
.sp
The \fBlibumem\fR library can call callbacks from any place that a \fBUMEM_NOFAIL\fR allocation is issued. In multithreaded applications, callbacks are expected to perform their own concurrency management.
.sp
.RE
.LP .LP
The function call \fBumem_alloc\fR(0, \fIflag\fR) always returns \fINULL\fR. The function call \fBumem_free\fR(\fINULL\fR, 0) is allowed. The function call \fBumem_alloc\fR(0, \fIflag\fR) always returns \fINULL\fR.
The function call \fBumem_free\fR(\fINULL\fR, 0) is allowed.
.sp .sp
.LP .LP
The \fBumem_zalloc()\fR function has the same semantics as \fBumem_alloc()\fR, but the block of memory is initialized to zeros before it is returned. The \fBumem_zalloc()\fR function has the same semantics as \fBumem_alloc()\fR,
but the block of memory is initialized to zeros before it is returned.
.sp .sp
.LP .LP
The \fBumem_free()\fR function frees blocks previously allocated using \fBumem_alloc()\fR and \fBumem_zalloc()\fR. The buffer address and size must exactly match the original allocation. Memory must not be returned piecemeal. The \fBumem_free()\fR function frees blocks previously allocated using
\fBumem_alloc()\fR and \fBumem_zalloc()\fR. The buffer address and size must
exactly match the original allocation. Memory must not be returned piecemeal.
.sp .sp
.LP .LP
The \fBumem_nofail_callback()\fR function sets the process-wide UMEM_NOFAIL callback. See the description of UMEM_NOFAIL for more information. The \fBumem_nofail_callback()\fR function sets the process-wide UMEM_NOFAIL
callback. See the description of UMEM_NOFAIL for more information.
.sp .sp
.LP .LP
The \fBmalloc()\fR, \fBcalloc()\fR, \fBfree()\fR, \fBmemalign()\fR, \fBrealloc()\fR, and \fBvalloc()\fR functions are are as described in The \fBmalloc()\fR, \fBcalloc()\fR, \fBfree()\fR, \fBmemalign()\fR,
\fBmalloc\fR(3C). The \fBlibumem\fR library provides these functions for backwards-compatibility with the standard functions. \fBrealloc()\fR, and \fBvalloc()\fR functions are as described in
.sp \fBmalloc\fR(3C). The \fBlibumem\fR library provides these functions for
backwards-compatibility with the standard functions.
.SH ENVIRONMENT VARIABLES .SH ENVIRONMENT VARIABLES
.sp
.LP .LP
See See \fBumem_debug\fR(3MALLOC) for environment variables that effect the
\fBumem_debug\fR(3MALLOC) for environment variables that effect the debugging features of the \fBlibumem\fR library. debugging features of the \fBlibumem\fR library.
.sp
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fBUMEM_OPTIONS\fR \fB\fBUMEM_OPTIONS\fR\fR
.ad .ad
.RS 14n .RS 16n
.rt Contains a list of comma-separated options. Unrecognized options are ignored.
Contains a list of comma-separated options. Unrecognized options are ignored. The options that are supported are: The options that are supported are:
.sp
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBbackend\fR=\fBsbrk\fR\fR \fB\fBbackend\fR=\fBsbrk\fR\fR
.ad .ad
@ -192,14 +168,49 @@ Contains a list of comma-separated options. Unrecognized options are ignored. T
.na .na
\fB\fBbackend\fR=\fBmmap\fR\fR \fB\fBbackend\fR=\fBmmap\fR\fR
.ad .ad
.RS 14n .RS 16n
.rt Set the underlying function used to allocate memory. This option can be set to
Set the underlying function used to allocate memory. This option can be set to \fBsbrk\fR (the default) for an \fBsbrk\fR (the default) for an \fBsbrk\fR(2)-based source or \fBmmap\fR for an
\fBsbrk\fR(2)-based source or \fBmmap\fR for an \fBmmap\fR(2)-based source. If set to a value that is not supported, \fBsbrk\fR
\fBmmap\fR(2)-based will be used.
source. If set to a value that is not supported, \fBsbrk\fR will be used. .RE
.sp .sp
.ne 2
.na
\fB\fBperthread_cache\fR=\fBsize\fR\fR
.ad
.RS 16n
libumem allows for each thread to cache recently freed small allocations for
future allocations. The size argument, which accepts k, m, g, and t, suffixes
denotes the maximum amount of memory each thread can use for this purpose. The
default amount used is 1 MB. Any buffers in the per-thread cache are freed when
the thread exits. The efficacy of the per-thread cache can be determined with
the \fB::umastat\fR \fBmdb\fR(1) \fIdcmd\fR debugger command.
.RE
.ne 2
.na
\fB\fBallocator\fR=\fBbest\fR\fR
.ad
.br
.na
\fB\fBallocator\fR=\fBfirst\fR\fR
.ad
.br
.na
\fB\fBallocator\fR=\fBinstant\fR\fR
.ad
.br
.na
\fB\fBallocator\fR=\fBnext\fR\fR
.ad
.RS 16n
Set the underlying allocation strategy. The \fBbest\fR fit strategy tells
libumem to use the smallest free segment possible. The \fBinstant\fR fit
strategy approximates the best fit strategy in constant cpu time. The
\fBfirst\fR fit strategy takes the first free segment that can honor the
allocation. The \fBnext\fR fit strategy uses the next free segment after the
previously allocated one.
.RE .RE
.RE .RE
@ -207,8 +218,7 @@ source. If set to a value that is not supported, \fBsbrk\fR will be used.
.SH EXAMPLES .SH EXAMPLES
.LP .LP
\fBExample 1 \fRUsing the \fBumem_alloc()\fR function. \fBExample 1 \fRUsing the \fBumem_alloc()\fR function.
.sp
.LP
.in +2 .in +2
.nf .nf
#include <stdio.h> #include <stdio.h>
@ -226,10 +236,10 @@ umem_free(buf, 1024);
\&... \&...
.fi .fi
.in -2 .in -2
.LP
\fBExample 2 \fRUsing the \fBumem_zalloc()\fR function
.LP .LP
\fBExample 2 \fRUsing the \fBumem_zalloc()\fR function
.sp
.in +2 .in +2
.nf .nf
#include <stdio.h> #include <stdio.h>
@ -247,10 +257,10 @@ umem_free(buf, 1024);
\&... \&...
.fi .fi
.in -2 .in -2
.LP
\fBExample 3 \fRUsing UMEM_NOFAIL
.LP .LP
\fBExample 3 \fRUsing UMEM_NOFAIL
.sp
.in +2 .in +2
.nf .nf
#include <stdlib.h> #include <stdlib.h>
@ -258,9 +268,9 @@ umem_free(buf, 1024);
#include <umem.h> #include <umem.h>
/* /*
* Note that the allocation code below does not have to * Note that the allocation code below does not have to
* check for umem_alloc() returning NULL * check for umem_alloc() returning NULL
*/ */
int int
my_failure_handler(void) my_failure_handler(void)
{ {
@ -281,10 +291,10 @@ for (i = 0; i < 100; i++)
\&... \&...
.fi .fi
.in -2 .in -2
.LP
\fBExample 4 \fRUsing UMEM_NOFAIL in a multithreaded application
.LP .LP
\fBExample 4 \fRUsing UMEM_NOFAIL in a multithreaded application
.sp
.in +2 .in +2
.nf .nf
#define _REENTRANT #define _REENTRANT
@ -340,186 +350,123 @@ if (status == NULL) {
.in -2 .in -2
.SH ATTRIBUTES .SH ATTRIBUTES
.LP
See
\fBattributes\fR(5) for descriptions of the following attributes:
.sp .sp
.LP .LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
.sp .sp
.TS .TS
tab() box; box;
cw(2.75i) |cw(2.75i) c | c
lw(2.75i) |lw(2.75i) l | l .
. ATTRIBUTE TYPE ATTRIBUTE VALUE
ATTRIBUTE TYPEATTRIBUTE VALUE
_ _
Interface StabilitySee below. Interface Stability Committed
_ _
MT-LevelMT-Safe MT-Level MT-Safe
_
Standard See below.
.TE .TE
.LP
The \fBmalloc()\fR, \fBcalloc()\fR, \fBfree()\fR, \fBrealloc()\fR, and \fBvalloc()\fR functions are Standard. The \fBmemalign()\fR function is Stable. The \fBumem_alloc()\fR, \fBumem_zalloc()\fR, \fBumem_free()\fR, and \fBumem_nofail_callback()\fR functions are Evolving.
.sp .sp
.SH SEE ALSO
.LP .LP
For \fBmalloc()\fR, \fBcalloc()\fR, \fBfree()\fR, \fBrealloc()\fR, and
\fBexit\fR(2), \fBvalloc()\fR, see \fBstandards\fR(5).
\fBmmap\fR(2), .SH SEE ALSO
\fBsbrk\fR(2), .sp
\fBbsdmalloc\fR(3MALLOC), .LP
\fBlibumem\fR(3LIB), \fBexit\fR(2), \fBmmap\fR(2), \fBsbrk\fR(2), \fBbsdmalloc\fR(3MALLOC),
\fBlongjmp\fR(3C), \fBlibumem\fR(3LIB), \fBlongjmp\fR(3C), \fBmalloc\fR(3C),
\fBmalloc\fR(3C), \fBmalloc\fR(3MALLOC), \fBmapmalloc\fR(3MALLOC), \fBpthread_exit\fR(3C),
\fBmalloc\fR(3MALLOC), \fBthr_exit\fR(3C), \fBumem_cache_create\fR(3MALLOC),
\fBmapmalloc\fR(3MALLOC), \fBumem_debug\fR(3MALLOC), \fBwatchmalloc\fR(3MALLOC), \fBattributes\fR(5),
\fBpthread_exit\fR(3C),
\fBthr_exit\fR(3C),
\fBumem_cache_create\fR(3MALLOC),
\fBumem_debug\fR(3MALLOC),
\fBwatchmalloc\fR(3MALLOC),
\fBattributes\fR(5),
\fBstandards\fR(5) \fBstandards\fR(5)
.sp .sp
.LP .LP
\fISolaris Modular Debugger Guide\fR
.sp
.SH WARNINGS .SH WARNINGS
.sp
.LP .LP
Any of the following can cause undefined results: Any of the following can cause undefined results:
.sp
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Passing a pointer returned from \fBumem_alloc()\fR or \fBumem_zalloc()\fR to \fBfree()\fR or \fBrealloc()\fR. Passing a pointer returned from \fBumem_alloc()\fR or \fBumem_zalloc()\fR to
.sp \fBfree()\fR or \fBrealloc()\fR.
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Passing a pointer returned from \fBmalloc()\fR, \fBcalloc()\fR, \fBvalloc()\fR, \fBmemalign()\fR, or \fBrealloc()\fR to \fBumem_free()\fR. Passing a pointer returned from \fBmalloc()\fR, \fBcalloc()\fR, \fBvalloc()\fR,
.sp \fBmemalign()\fR, or \fBrealloc()\fR to \fBumem_free()\fR.
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Writing past the end of a buffer allocated using \fBumem_alloc()\fR or \fBumem_zalloc()\fR Writing past the end of a buffer allocated using \fBumem_alloc()\fR or
.sp \fBumem_zalloc()\fR
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Performing \fBUMEM_NOFAIL\fR allocations from an Performing \fBUMEM_NOFAIL\fR allocations from an \fBatexit\fR(3C) handler.
\fBatexit\fR(3C) handler.
.sp
.RE .RE
.sp
.LP .LP
If the \fBUMEM_NOFAIL\fR callback performs \fBUMEM_NOFAIL\fR allocations, infinite recursion can occur. If the \fBUMEM_NOFAIL\fR callback performs \fBUMEM_NOFAIL\fR allocations,
.sp infinite recursion can occur.
.SH NOTES .SH NOTES
.sp
.LP .LP
The following list compares the features of the The following list compares the features of the \fBmalloc\fR(3C),
\fBmalloc\fR(3C), \fBbsdmalloc\fR(3MALLOC), \fBmalloc\fR(3MALLOC), \fBmtmalloc\fR(3MALLOC) , and
\fBbsdmalloc\fR(3MALLOC), the \fBlibumem\fR functions.
\fBmalloc\fR(3MALLOC),
\fBmtmalloc\fR(3MALLOC) , and the \fBlibumem\fR
functions.
.sp
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
The The \fBmalloc\fR(3C), \fBbsdmalloc\fR(3MALLOC), and \fBmalloc\fR(3MALLOC)
\fBmalloc\fR(3C), functions have no support for concurrency. The \fBlibumem\fR and
\fBbsdmalloc\fR(3MALLOC), and \fBmtmalloc\fR(3MALLOC) functions support concurrent allocations.
\fBmalloc\fR(3MALLOC) functions have no support for concurrency. The \fBlibumem\fR and
\fBmtmalloc\fR(3MALLOC)
functions support concurrent allocations.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
The The \fBbsdmalloc\fR(3MALLOC) functions afford better performance but are
\fBbsdmalloc\fR(3MALLOC) functions afford better performance but are space-inefficient. space-inefficient.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
The The \fBmalloc\fR(3MALLOC) functions are space-efficient but have slower
\fBmalloc\fR(3MALLOC) functions are space-efficient but have slower performance. performance.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
The standard, fully SCD-compliant The standard, fully SCD-compliant \fBmalloc\fR(3C) functions are a trade-off
\fBmalloc\fR(3C) functions are a trade-off between performance and space-efficiency. between performance and space-efficiency.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
The The \fBmtmalloc\fR(3MALLOC) functions provide fast, concurrent \fBmalloc()\fR
\fBmtmalloc\fR(3MALLOC) functions provide fast, concurrent \fBmalloc()\fR implementations that are not space-efficient. implementations that are not space-efficient.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
The \fBlibumem\fR functions provide a fast, concurrent allocation implementation that in most cases is more space-efficient than The \fBlibumem\fR functions provide a fast, concurrent allocation
implementation that in most cases is more space-efficient than
\fBmtmalloc\fR(3MALLOC). \fBmtmalloc\fR(3MALLOC).
.sp
.RE .RE

View file

@ -23,6 +23,9 @@
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms. * Use is subject to license terms.
*/ */
/*
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _UMEM_BASE_H #ifndef _UMEM_BASE_H
#define _UMEM_BASE_H #define _UMEM_BASE_H
@ -76,6 +79,8 @@ extern volatile uint32_t umem_reaping;
#define UMEM_REAP_ADDING 0x00000001 /* umem_reap() is active */ #define UMEM_REAP_ADDING 0x00000001 /* umem_reap() is active */
#define UMEM_REAP_ACTIVE 0x00000002 /* update thread is reaping */ #define UMEM_REAP_ACTIVE 0x00000002 /* update thread is reaping */
extern uintptr_t umem_tmem_off;
/* /*
* umem.c: tunables * umem.c: tunables
*/ */
@ -98,6 +103,7 @@ extern size_t umem_lite_minsize;
extern size_t umem_lite_maxalign; extern size_t umem_lite_maxalign;
extern size_t umem_maxverify; extern size_t umem_maxverify;
extern size_t umem_minfirewall; extern size_t umem_minfirewall;
extern size_t umem_ptc_size;
extern uint32_t umem_flags; extern uint32_t umem_flags;
@ -140,6 +146,20 @@ extern int umem_create_update_thread(void);
void umem_setup_envvars(int); void umem_setup_envvars(int);
void umem_process_envvars(void); void umem_process_envvars(void);
/*
* umem_genasm.c: private interfaces
*/
extern const int umem_genasm_supported;
extern int umem_genasm(int *, umem_cache_t **, int);
/*
* malloc.c: traditional malloc/free interface for genasm
*/
extern void *umem_malloc(size_t);
extern void umem_malloc_free(void *);
extern void *_malloc(size_t);
extern void _free(void *);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -1,26 +1,12 @@
'\" te '\" te
.\" CDDL HEADER START .\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved.
.\" .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" The contents of this file are subject to the terms of the .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" Common Development and Distribution License (the "License"). .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
.\" You may not use this file except in compliance with the License. .TH UMEM_CACHE_CREATE 3MALLOC "Mar 24, 2008"
.\"
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
.\" or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions
.\" and limitations under the License.
.\"
.\" When distributing Covered Code, include this CDDL HEADER in each
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" CDDL HEADER END
.\" Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved.
.TH umem_cache_create 3MALLOC "4 Nov 2003" "SunOS 5.11" "Memory Allocation Library Functions"
.SH NAME .SH NAME
umem_cache_create, umem_cache_destroy, umem_cache_alloc, umem_cache_free \- allocation cache manipulation umem_cache_create, umem_cache_destroy, umem_cache_alloc, umem_cache_free \-
allocation cache manipulation
.SH SYNOPSIS .SH SYNOPSIS
.LP .LP
.nf .nf
@ -32,163 +18,133 @@ cc [ \fIflag \&.\|.\|.\fR ] \fIfile\fR\&.\|.\|. \fB-lumem\fR [ \fIlibrary \&.\|.
\fBumem_destructor_t *\fR\fIdestructor\fR, \fBumem_reclaim_t *\fR\fIreclaim\fR, \fBumem_destructor_t *\fR\fIdestructor\fR, \fBumem_reclaim_t *\fR\fIreclaim\fR,
\fBvoid *\fR\fIcallback_data\fR, \fBvmem_t *\fR\fIsource\fR, \fBint\fR \fIcflags\fR); \fBvoid *\fR\fIcallback_data\fR, \fBvmem_t *\fR\fIsource\fR, \fBint\fR \fIcflags\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid\fR \fBumem_cache_destroy\fR(\fBumem_cache_t *\fR\fIcache\fR); \fBvoid\fR \fBumem_cache_destroy\fR(\fBumem_cache_t *\fR\fIcache\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid *\fR\fBumem_cache_alloc\fR(\fBumem_cache_t *\fR\fIcache\fR, \fBint\fR \fIflags\fR); \fBvoid *\fR\fBumem_cache_alloc\fR(\fBumem_cache_t *\fR\fIcache\fR, \fBint\fR \fIflags\fR);
.fi .fi
.LP .LP
.nf .nf
\fBvoid\fR \fBumem_cache_free\fR(\fBumem_cache_t *\fR\fIcache\fR, \fBvoid *\fR\fIbuffer\fR); \fBvoid\fR \fBumem_cache_free\fR(\fBumem_cache_t *\fR\fIcache\fR, \fBvoid *\fR\fIbuffer\fR);
.fi .fi
.SH DESCRIPTION .SH DESCRIPTION
.LP
These functions create, destroy, and use an "object cache". An object cache is a collection of buffers of a single size, with optional content caching enabled by the use of callbacks (see \fBCache Callbacks\fR). Object caches are MT-Safe. Multiple allocations and freeing of
memory from different threads can proceed simultaneously. Object caches are faster and use less space per buffer than
\fBmalloc\fR(3MALLOC) and
\fBumem_alloc\fR(3MALLOC). For more information about object caching, see "The Slab Allocator: An Object-Caching Kernel Memory Allocator" and "Magazines
and vmem: Extending the Slab Allocator to Many CPUs and Arbitrary Resources".
.sp .sp
.LP .LP
The \fBumem_cache_create()\fR function creates object caches. Once a cache has been created, objects can be requested from and returned to the cache using \fBumem_cache_alloc()\fR and \fBumem_cache_free()\fR, respectively. A cache with no outstanding These functions create, destroy, and use an "object cache" An object cache is
buffers can be destroyed with \fBumem_cache_destroy()\fR. a collection of buffers of a single size, with optional content caching enabled
by the use of callbacks (see \fBCache Callbacks\fR). Object caches are
MT-Safe. Multiple allocations and freeing of memory from different threads can
proceed simultaneously. Object caches are faster and use less space per buffer
than \fBmalloc\fR(3MALLOC) and \fBumem_alloc\fR(3MALLOC). For more information
about object caching, see "The Slab Allocator: An Object-Caching Kernel Memory
Allocator" and "Magazines and vmem: Extending the Slab Allocator to Many CPUs
and Arbitrary Resources".
.sp .sp
.SS Creating and Destroying Caches
.LP .LP
The \fBumem_cache_create()\fR function creates a cache of objects and takes as arguments the following: The \fBumem_cache_create()\fR function creates object caches. Once a cache has
been created, objects can be requested from and returned to the cache using
\fBumem_cache_alloc()\fR and \fBumem_cache_free()\fR, respectively. A cache
with no outstanding buffers can be destroyed with \fBumem_cache_destroy()\fR.
.SS "Creating and Destroying Caches"
.sp .sp
.LP
The \fBumem_cache_create()\fR function creates a cache of objects and takes as
arguments the following:
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIdebug_name\fR\fR \fB\fIdebug_name\fR\fR
.ad .ad
.RS 15n .RS 17n
.rt
A human-readable name for debugging purposes. A human-readable name for debugging purposes.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIbufsize\fR\fR \fB\fIbufsize\fR\fR
.ad .ad
.RS 15n .RS 17n
.rt
The size, in bytes, of the buffers in this cache. The size, in bytes, of the buffers in this cache.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIalign\fR\fR \fB\fIalign\fR\fR
.ad .ad
.RS 15n .RS 17n
.rt The minimum alignment required for buffers in this cache. This parameter must
The minimum alignment required for buffers in this cache. This parameter must be a power of 2. If 0, it is replaced with the minimum required alignment for the current architecture. be a power of 2. If 0, it is replaced with the minimum required alignment for
.sp the current architecture.
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIconstructor\fR\fR \fB\fIconstructor\fR\fR
.ad .ad
.RS 15n .RS 17n
.rt
The callback to construct an object. The callback to construct an object.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIdestructor\fR\fR \fB\fIdestructor\fR\fR
.ad .ad
.RS 15n .RS 17n
.rt
The callback to destroy an object. The callback to destroy an object.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIreclaim\fR\fR \fB\fIreclaim\fR\fR
.ad .ad
.RS 15n .RS 17n
.rt
The callback to reclaim objects. The callback to reclaim objects.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIcallback_data\fR\fR \fB\fIcallback_data\fR\fR
.ad .ad
.RS 15n .RS 17n
.rt
An opaque pointer passed to the callbacks. An opaque pointer passed to the callbacks.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIsource\fR\fR \fB\fIsource\fR\fR
.ad .ad
.RS 15n .RS 17n
.rt
This parameter must be \fINULL\fR. This parameter must be \fINULL\fR.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIcflags\fR\fR \fB\fIcflags\fR\fR
.ad .ad
.RS 15n .RS 17n
.rt This parameter must be either 0 or \fBUMC_NODEBUG\fR. If \fBUMC_NODEBUG\fR, all
This parameter must be either 0 or \fBUMC_NODEBUG\fR. If \fBUMC_NODEBUG\fR, all debugging features are disabled for this cache. See debugging features are disabled for this cache. See \fBumem_debug\fR(3MALLOC).
\fBumem_debug\fR(3MALLOC).
.sp
.RE .RE
.sp
.LP .LP
Each cache can have up to three associated callbacks: Each cache can have up to three associated callbacks:
.sp .sp
.LP
.in +2 .in +2
.nf .nf
int constructor(void *buffer, void *callback_data, int flags); int constructor(void *buffer, void *callback_data, int flags);
@ -197,230 +153,196 @@ void reclaim(void *callback_data);
.fi .fi
.in -2 .in -2
.LP
The \fIcallback_data\fR argument is always equal to the value passed to \fBumem_cache_create()\fR, thereby allowing a client to use the same callback functions for multiple caches, but with customized behavior.
.sp .sp
.LP .LP
The reclaim callback is called when the umem function is requesting more memory from the operating system. This callback can be used by clients who retain objects longer than they are strictly needed (for example, caching non-active state). A typical reclaim callback might return to the The \fIcallback_data\fR argument is always equal to the value passed to
cache ten per cent of the unneeded buffers. \fBumem_cache_create()\fR, thereby allowing a client to use the same callback
functions for multiple caches, but with customized behavior.
.sp .sp
.LP .LP
The constructor and destructor callbacks enable the management of buffers with the constructed state. The constructor takes as arguments a buffer with undefined contents, some callback data, and the flags to use for any allocations. This callback should transform the buffer into the constructed The reclaim callback is called when the umem function is requesting more memory
from the operating system. This callback can be used by clients who retain
objects longer than they are strictly needed (for example, caching non-active
state). A typical reclaim callback might return to the cache ten per cent of
the unneeded buffers.
.sp
.LP
The constructor and destructor callbacks enable the management of buffers with
the constructed state. The constructor takes as arguments a buffer with
undefined contents, some callback data, and the flags to use for any
allocations. This callback should transform the buffer into the constructed
state. state.
.sp .sp
.LP .LP
The destructor callback takes as an argument a constructed object and prepares it for return to the general pool of memory. The destructor should undo any state that the constructor created. For debugging, the destructor can also check that the buffer is in the constructed state, to catch The destructor callback takes as an argument a constructed object and prepares
incorrectly freed buffers. See it for return to the general pool of memory. The destructor should undo any
\fBumem_debug\fR(3MALLOC) for further information on debugging support. state that the constructor created. For debugging, the destructor can also
check that the buffer is in the constructed state, to catch incorrectly freed
buffers. See \fBumem_debug\fR(3MALLOC) for further information on debugging
support.
.sp .sp
.LP .LP
The \fBumem_cache_destroy()\fR function destroys an object cache. If the cache has any outstanding allocations, the behavior is undefined. The \fBumem_cache_destroy()\fR function destroys an object cache. If the cache
has any outstanding allocations, the behavior is undefined.
.SS "Allocating Objects"
.sp .sp
.SS Allocating Objects
.LP .LP
The \fBumem_cache_alloc()\fR function takes as arguments: The \fBumem_cache_alloc()\fR function takes as arguments:
.sp
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIcache\fR\fR \fB\fIcache\fR\fR
.ad .ad
.RS 7n .RS 9n
.rt
a cache pointer a cache pointer
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIflags\fR\fR \fB\fIflags\fR\fR
.ad .ad
.RS 7n .RS 9n
.rt flags that determine the behavior if \fBumem_cache_alloc()\fR is unable to
flags that determine the behavior if \fBumem_cache_alloc()\fR is unable to fulfill the allocation request fulfill the allocation request
.sp
.RE .RE
.LP
If successful, \fBumem_cache_alloc()\fR returns a pointer to the beginning of an object of \fIbufsize\fR length.
.sp .sp
.LP
If successful, \fBumem_cache_alloc()\fR returns a pointer to the beginning of
an object of \fIbufsize\fR length.
.sp
.LP .LP
There are three cases to consider: There are three cases to consider:
.sp
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
A new buffer needed to be allocated. If the cache was created with a constructor, it is applied to the buffer and the resulting object is returned. A new buffer needed to be allocated. If the cache was created with a
.sp constructor, it is applied to the buffer and the resulting object is returned.
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
The object cache was able to use a previously freed buffer. If the cache was created with a constructor, the object is returned unchanged from when it was freed. The object cache was able to use a previously freed buffer. If the cache was
.sp created with a constructor, the object is returned unchanged from when it was
freed.
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
The allocation of a new buffer failed. The \fIflags\fR argument determines the behavior: The allocation of a new buffer failed. The \fIflags\fR argument determines the
.sp behavior:
.RS
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBUMEM_DEFAULT\fR\fR \fB\fBUMEM_DEFAULT\fR\fR
.ad .ad
.RS 14n .RS 16n
.rt The \fBumem_cache_alloc()\fR function returns \fINULL\fR if the allocation
The \fBumem_cache_alloc()\fR function returns \fINULL\fR if the allocation fails. fails.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBUMEM_NOFAIL\fR\fR \fB\fBUMEM_NOFAIL\fR\fR
.ad .ad
.RS 14n .RS 16n
.rt The \fBumem_cache_alloc()\fR function cannot return \fINULL\fR. A callback is
The \fBumem_cache_alloc()\fR function cannot return \fINULL\fR. A callback is used to determine what action occurs. See used to determine what action occurs. See \fBumem_alloc\fR(3MALLOC) for more
\fBumem_alloc\fR(3MALLOC) for more information. information.
.RE
.RE
.RE
.SS "Freeing Objects"
.sp .sp
.RE
.RE
.SS Freeing Objects
.LP .LP
The \fBumem_cache_free()\fR function takes as arguments: The \fBumem_cache_free()\fR function takes as arguments:
.sp
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIcache\fR\fR \fB\fIcache\fR\fR
.ad .ad
.RS 7n .RS 9n
.rt
a cache pointer a cache pointer
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fIbuf\fR\fR \fB\fIbuf\fR\fR
.ad .ad
.RS 7n .RS 9n
.rt a pointer previously returned from \fBumem_cache_alloc()\fR. This argument must
a pointer previously returned from \fBumem_cache_alloc()\fR. This argument must not be \fINULL\fR. not be \fINULL\fR.
.sp
.RE .RE
.LP
If the cache was created with a constructor callback, the object must be returned to the constructed state before it is freed.
.sp .sp
.LP .LP
Undefined behavior results if an object is freed multiple times, if an object is modified after it is freed, or if an object is freed to a cache other than the one from which it was allocated. If the cache was created with a constructor callback, the object must be
returned to the constructed state before it is freed.
.sp .sp
.SS Caches with Constructors
.LP .LP
When a constructor callback is in use, there is essentially a contract between the cache and its clients. The cache guarantees that all objects returned from \fBumem_cache_alloc()\fR will be in the constructed state, and the client guarantees that it will return the object Undefined behavior results if an object is freed multiple times, if an object
to the constructed state before handing it to \fBumem_cache_free()\fR. is modified after it is freed, or if an object is freed to a cache other than
the one from which it was allocated.
.SS "Caches with Constructors"
.sp .sp
.LP
When a constructor callback is in use, there is essentially a contract between
the cache and its clients. The cache guarantees that all objects returned from
\fBumem_cache_alloc()\fR will be in the constructed state, and the client
guarantees that it will return the object to the constructed state before
handing it to \fBumem_cache_free()\fR.
.SH RETURN VALUES .SH RETURN VALUES
.sp
.LP .LP
Upon failure, the \fBumem_cache_create()\fR function returns a null pointer. Upon failure, the \fBumem_cache_create()\fR function returns a null pointer.
.sp
.SH ERRORS .SH ERRORS
.sp
.LP .LP
The \fBumem_cache_create()\fR function will fail if: The \fBumem_cache_create()\fR function will fail if:
.sp
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBEAGAIN\fR\fR \fB\fBEAGAIN\fR\fR
.ad .ad
.RS 8n .RS 10n
.rt
There is not enough memory available to allocate the cache data structure. There is not enough memory available to allocate the cache data structure.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBEINVAL\fR\fR \fB\fBEINVAL\fR\fR
.ad .ad
.RS 8n .RS 10n
.rt The \fIdebug_name\fR argument is \fINULL\fR, the \fIalign\fR argument is not a
The \fIdebug_name\fR argument is \fINULL\fR, the \fIalign\fR argument is not a power of two or is larger than the system pagesize, or the \fIbufsize\fR argument is 0. power of two or is larger than the system pagesize, or the \fIbufsize\fR
.sp argument is 0.
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBENOMEM\fR\fR \fB\fBENOMEM\fR\fR
.ad .ad
.RS 8n .RS 10n
.rt The \fBlibumem\fR library could not be initialized, or the \fIbufsize\fR
The \fBlibumem\fR library could not be initialized, or the \fIbufsize\fR argument is too large and its use would cause integer overflow to occur. argument is too large and its use would cause integer overflow to occur.
.sp
.RE .RE
.SH EXAMPLES .SH EXAMPLES
.LP .LP
\fBExample 1 \fRUse a fixed-size structure with no constructor callback. \fBExample 1 \fRUse a fixed-size structure with no constructor callback.
.sp
.LP
.in +2 .in +2
.nf .nf
#include <umem.h> #include <umem.h>
@ -430,9 +352,9 @@ typedef struct my_obj {
} my_obj_t; } my_obj_t;
/* /*
* my_objs can be freed at any time. The contents of * my_objs can be freed at any time. The contents of
* my_data1 is undefined at allocation time. * my_data1 is undefined at allocation time.
*/ */
umem_cache_t *my_obj_cache; umem_cache_t *my_obj_cache;
@ -448,10 +370,10 @@ umem_cache_free(my_obj_cache, cur);
\&... \&...
.fi .fi
.in -2 .in -2
.LP
\fBExample 2 \fRUse an object with a mutex.
.LP .LP
\fBExample 2 \fRUse an object with a mutex.
.sp
.in +2 .in +2
.nf .nf
#define _REENTRANT #define _REENTRANT
@ -464,8 +386,8 @@ typedef struct my_obj {
} my_obj_t; } my_obj_t;
/* /*
* my_objs can only be freed when my_mutex is unlocked. * my_objs can only be freed when my_mutex is unlocked.
*/ */
int int
my_obj_constructor(void *buf, void *ignored, int flags) my_obj_constructor(void *buf, void *ignored, int flags)
{ {
@ -498,10 +420,10 @@ umem_cache_free(my_obj_cache, cur);
\&... \&...
.fi .fi
.in -2 .in -2
.LP
\fBExample 3 \fRUse a more complex object with a mutex.
.LP .LP
\fBExample 3 \fRUse a more complex object with a mutex.
.sp
.in +2 .in +2
.nf .nf
#define _REENTRANT #define _REENTRANT
@ -517,10 +439,10 @@ typedef struct my_obj {
} my_obj_t; } my_obj_t;
/* /*
* my_objs can only be freed when my_barlist == NULL, * my_objs can only be freed when my_barlist == NULL,
* my_refcount == 0, there are no waiters on my_cv, and * my_refcount == 0, there are no waiters on my_cv, and
* my_mutex is unlocked. * my_mutex is unlocked.
*/ */
int int
my_obj_constructor(void *buf, void *ignored, int flags) my_obj_constructor(void *buf, void *ignored, int flags)
@ -561,10 +483,10 @@ umem_cache_free(my_obj_cache, cur);
\&... \&...
.fi .fi
.in -2 .in -2
.LP
\fBExample 4 \fRUse objects with a subordinate buffer while reusing callbacks.
.LP .LP
\fBExample 4 \fRUse objects with a subordinate buffer while reusing callbacks.
.sp
.in +2 .in +2
.nf .nf
#include assert.h> #include assert.h>
@ -576,8 +498,8 @@ typedef struct my_obj {
} my_obj_t; } my_obj_t;
/* /*
* my_size and the my_buffer pointer should never be changed * my_size and the my_buffer pointer should never be changed
*/ */
int int
my_obj_constructor(void *buf, void *arg, int flags) my_obj_constructor(void *buf, void *arg, int flags)
@ -631,185 +553,130 @@ umem_cache_free(my_obj_8k_cache, my_obj_8k);
.fi .fi
.in -2 .in -2
.LP
See the \fBEXAMPLES\fR section of
\fBumem_alloc\fR(3MALLOC) for examples involving the \fBUMEM_NOFAIL\fR flag.
.sp .sp
.LP
See the \fBEXAMPLES\fR section of \fBumem_alloc\fR(3MALLOC) for examples
involving the \fBUMEM_NOFAIL\fR flag.
.SH ATTRIBUTES .SH ATTRIBUTES
.LP
See
\fBattributes\fR(5) for descriptions of the following attributes:
.sp .sp
.LP .LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
.sp .sp
.TS .TS
tab() box; box;
cw(2.75i) |cw(2.75i) c | c
lw(2.75i) |lw(2.75i) l | l .
. ATTRIBUTE TYPE ATTRIBUTE VALUE
ATTRIBUTE TYPEATTRIBUTE VALUE
_ _
Interface StabilityEvolving Interface Stability Committed
_ _
MT-LevelMT-Safe MT-Level MT-Safe
.TE .TE
.SH SEE ALSO .SH SEE ALSO
.LP
\fBsetcontext\fR(2),
\fBatexit\fR(3C),
\fBlibumem\fR(3LIB),
\fBlongjmp\fR(3C),
\fBswapcontext\fR(3C),
\fBthr_exit\fR(3C),
\fBumem_alloc\fR(3MALLOC),
\fBumem_debug\fR(3MALLOC),
\fBattributes\fR(5)
.sp .sp
.LP .LP
Bonwick, Jeff, "The Slab Allocator: An Object-Caching Kernel Memory Allocator", Proceedings of the Summer 1994 Usenix Conference. \fBsetcontext\fR(2), \fBatexit\fR(3C), \fBlibumem\fR(3LIB), \fBlongjmp\fR(3C),
\fBswapcontext\fR(3C), \fBthr_exit\fR(3C), \fBumem_alloc\fR(3MALLOC),
\fBumem_debug\fR(3MALLOC), \fBattributes\fR(5)
.sp .sp
.LP .LP
Bonwick, Jeff and Jonathan Adams, "Magazines and vmem: Extending the Slab Allocator to Many CPUs and Arbitrary Resources", Proceedings of the Summer 2001 Usenix Conference. Bonwick, Jeff, "The Slab Allocator: An Object-Caching Kernel Memory Allocator",
Proceedings of the Summer 1994 Usenix Conference.
.sp .sp
.LP
Bonwick, Jeff and Jonathan Adams, "Magazines and vmem: Extending the Slab
Allocator to Many CPUs and Arbitrary Resources", Proceedings of the Summer 2001
Usenix Conference.
.SH WARNINGS .SH WARNINGS
.sp
.LP .LP
Any of the following can cause undefined results: Any of the following can cause undefined results:
.sp
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Destroying a cache that has outstanding allocated buffers. Destroying a cache that has outstanding allocated buffers.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Using a cache after it has been destroyed. Using a cache after it has been destroyed.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Calling \fBumem_cache_free()\fR on the same buffer multiple times. Calling \fBumem_cache_free()\fR on the same buffer multiple times.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Passing a \fINULL\fR pointer to \fBumem_cache_free()\fR. Passing a \fINULL\fR pointer to \fBumem_cache_free()\fR.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Writing past the end of a buffer. Writing past the end of a buffer.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Reading from or writing to a buffer after it has been freed. Reading from or writing to a buffer after it has been freed.
.sp
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
Performing \fBUMEM_NOFAIL\fR allocations from an Performing \fBUMEM_NOFAIL\fR allocations from an \fBatexit\fR(3C) handler.
\fBatexit\fR(3C) handler.
.sp
.RE .RE
.sp
.LP .LP
Per-cache callbacks can be called from a variety of contexts. The use of functions that modify the active context, such as Per-cache callbacks can be called from a variety of contexts. The use of
\fBsetcontext\fR(2), functions that modify the active context, such as \fBsetcontext\fR(2),
\fBswapcontext\fR(3C), and \fBswapcontext\fR(3C), and \fBthr_exit\fR(3C), or functions that are unsafe for
\fBthr_exit\fR(3C), use in multithreaded applications, such as \fBlongjmp\fR(3C) and
or functions that are unsafe for use in multithreaded applications, such as
\fBlongjmp\fR(3C) and
\fBsiglongjmp\fR(3C), result in undefined behavior. \fBsiglongjmp\fR(3C), result in undefined behavior.
.sp .sp
.LP .LP
A constructor callback that performs allocations must pass its \fIflags\fR argument unchanged to A constructor callback that performs allocations must pass its \fIflags\fR
\fBumem_alloc\fR(3MALLOC) and \fBumem_cache_alloc()\fR. Any allocations made with a different flags argument results in undefined behavior. The constructor must correctly handle the failure of any allocations it makes. argument unchanged to \fBumem_alloc\fR(3MALLOC) and \fBumem_cache_alloc()\fR.
.sp Any allocations made with a different flags argument results in undefined
behavior. The constructor must correctly handle the failure of any allocations
it makes.
.SH NOTES .SH NOTES
.sp
.LP .LP
Object caches make the following guarantees about objects: Object caches make the following guarantees about objects:
.sp
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
If the cache has a constructor callback, it is applied to every object before it is returned from \fBumem_cache_alloc()\fR for the first time. If the cache has a constructor callback, it is applied to every object before
.sp it is returned from \fBumem_cache_alloc()\fR for the first time.
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
If the cache has a constructor callback, an object passed to \fBumem_cache_free()\fR and later returned from \fBumem_cache_alloc()\fR is not modified between the two events. If the cache has a constructor callback, an object passed to
.sp \fBumem_cache_free()\fR and later returned from \fBumem_cache_alloc()\fR is not
modified between the two events.
.RE .RE
.sp
.RS +4 .RS +4
.TP .TP
.ie t \(bu .ie t \(bu
.el o .el o
If the cache has a destructor, it is applied to all objects before their underlying storage is returned. If the cache has a destructor, it is applied to all objects before their
.sp underlying storage is returned.
.RE .RE
.sp
.LP .LP
No other guarantees are made. In particular, even if there are buffers recently freed to the cache, \fBumem_cache_alloc()\fR can fail. No other guarantees are made. In particular, even if there are buffers recently
.sp freed to the cache, \fBumem_cache_alloc()\fR can fail.

View file

@ -1,24 +1,9 @@
'\" te '\" te
.\" CDDL HEADER START
.\"
.\" The contents of this file are subject to the terms of the
.\" Common Development and Distribution License (the "License").
.\" You may not use this file except in compliance with the License.
.\"
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
.\" or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions
.\" and limitations under the License.
.\"
.\" When distributing Covered Code, include this CDDL HEADER in each
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" CDDL HEADER END
.\" Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved.
.TH umem_debug 3MALLOC "26 July 2002" "SunOS 5.11" "Memory Allocation Library Functions" .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
.TH UMEM_DEBUG 3MALLOC "Jul 26, 2002"
.SH NAME .SH NAME
umem_debug \- debugging features of the umem library umem_debug \- debugging features of the umem library
.SH SYNOPSIS .SH SYNOPSIS
@ -29,247 +14,198 @@ umem_debug \- debugging features of the umem library
.fi .fi
.SH DESCRIPTION .SH DESCRIPTION
.LP
The \fBlibumem\fR library provides debugging features that detect memory leaks, buffer overruns, multiple frees, use of uninitialized data, use of freed data, and many other common programming errors. The activation of the run-time debugging features is controlled by environment variables.
.sp .sp
.LP .LP
When the library detects an error, it writes a description of the error to an internal buffer that is readable with the \fB::umem_status\fR The \fBlibumem\fR library provides debugging features that detect memory leaks,
\fBmdb\fR(1) \fIdcmd\fR and then calls buffer overruns, multiple frees, use of uninitialized data, use of freed data,
\fBabort\fR(3C). and many other common programming errors. The activation of the run-time
debugging features is controlled by environment variables.
.sp .sp
.LP
When the library detects an error, it writes a description of the error to an
internal buffer that is readable with the \fB::umem_status\fR \fBmdb\fR(1)
\fIdcmd\fR and then calls \fBabort\fR(3C).
.SH ENVIRONMENT VARIABLES .SH ENVIRONMENT VARIABLES
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fBUMEM_DEBUG\fR \fB\fBUMEM_DEBUG\fR\fR
.ad .ad
.RS 14n .RS 16n
.rt This variable contains a list of comma-separated options. Unrecognized options
This variable contains a list of comma-separated options. Unrecognized options are ignored. Possible options include: are ignored. Possible options include:
.sp
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBaudit\fR[=\fIframes\fR]\fR \fB\fBaudit\fR[=\fIframes\fR]\fR
.ad .ad
.RS 18n .RS 20n
.rt This option enables the recording of auditing information, including thread ID,
This option enables the recording of auditing information, including thread ID, high-resolution time stamp, and stack trace for the last action (allocation or free) on every allocation. If transaction logging high-resolution time stamp, and stack trace for the last action (allocation or
(see UMEM_LOGGING) is enabled, this auditing information is also logged. free) on every allocation. If transaction logging (see \fBUMEM_LOGGING\fR) is
enabled, this auditing information is also logged.
.sp .sp
The \fIframes\fR parameter sets the number of stack frames recorded in the
auditing structure. The upper bound for frames is implementation-defined. If a
larger value is requested, the upper bound is used instead.
.sp .sp
The \fIframes\fR parameter sets the number of stack frames recorded in the auditing structure. The upper bound for frames is implementation-defined. If a larger value is requested, the upper bound is used instead. If \fIframes\fR is not specified or is not an integer, the default value of 15
.sp is used.
.sp
If \fIframes\fR is not specified or is not an integer, the default value of 15 is used.
.sp
.sp .sp
This option also enables the \fBguards\fR option. This option also enables the \fBguards\fR option.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBcontents\fR[=\fIcount\fR]\fR \fB\fBcontents\fR[=\fIcount\fR]\fR
.ad .ad
.RS 18n .RS 20n
.rt If auditing and contents logging (see \fBUMEM_LOGGING\fR) are enabled, the
If auditing and contents logging (see UMEM_LOGGING) are enabled, the first \fIcount\fR bytes of each buffer are logged when they are freed. If a buffer is shorter than \fIcount\fR bytes, it is logged in its entirety. first \fIcount\fR bytes of each buffer are logged when they are freed. If a
buffer is shorter than \fIcount\fR bytes, it is logged in its entirety.
.sp .sp
If \fIcount\fR is not specified or is not an integer, the default value of 256
.sp is used.
If \fIcount\fR is not specified or is not an integer, the default value of 256 is used.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBdefault\fR\fR \fB\fBdefault\fR\fR
.ad .ad
.RS 18n .RS 20n
.rt
This option is equivalent to \fBaudit\fR,\fBcontents\fR,\fBguards\fR. This option is equivalent to \fBaudit\fR,\fBcontents\fR,\fBguards\fR.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBguards\fR\fR \fB\fBguards\fR\fR
.ad .ad
.RS 18n .RS 20n
.rt This option enables filling allocated and freed buffers with special patterns
This option enables filling allocated and freed buffers with special patterns to help detect the use of uninitialized data and previously freed buffers. It also enables an 8-byte redzone after each buffer that contains \fB0xfeedfacefeedfaceULL\fR. to help detect the use of uninitialized data and previously freed buffers. It
also enables an 8-byte redzone after each buffer that contains
\fB0xfeedfacefeedfaceULL\fR.
.sp .sp
When an object is freed, it is filled with \fB0xdeadbeef\fR. When an object is
.sp allocated, the \fB0xdeadbeef\fR pattern is verified and replaced with
When an object is freed, it is filled with \fB0xdeadbeef\fR. When an object is allocated, the \fB0xdeadbeef\fR pattern is verified and replaced with \fB0xbaddcafe\fR. The redzone is checked every time a buffer is allocated or freed. \fB0xbaddcafe\fR. The redzone is checked every time a buffer is allocated or
.sp freed.
.sp .sp
For caches with either constructors or destructors, or both, For caches with either constructors or destructors, or both,
\fBumem_cache_alloc\fR(3MALLOC) and \fBumem_cache_alloc\fR(3MALLOC) and \fBumem_cache_free\fR(3MALLOC) apply the
\fBumem_cache_free\fR(3MALLOC) apply the cache's constructor and destructor, respectively, instead of caching constructed objects. The presence of cache's constructor and destructor, respectively, instead of caching
\fBassert\fR(3C)s constructed objects. The presence of \fBassert\fR(3C)s in the destructor
in the destructor verifying that the buffer is in the constructed state can be used to detect any objects returned in an improper state. See verifying that the buffer is in the constructed state can be used to detect any
\fBumem_cache_create\fR(3MALLOC) for objects returned in an improper state. See \fBumem_cache_create\fR(3MALLOC)
details. for details.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBverbose\fR\fR \fB\fBverbose\fR\fR
.ad .ad
.RS 18n .RS 20n
.rt The library writes error descriptions to standard error before aborting. These
The library writes error descriptions to standard error before aborting. These messages are not localized. messages are not localized.
.sp
.RE .RE
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fBUMEM_LOGGING\fR \fB\fBUMEM_LOGGING\fR\fR
.ad .ad
.RS 14n .RS 16n
.rt To be enabled, this variable should be set to a comma-separated list of
To be enabled, this variable should be set to a comma-separated list of in-memory logs. The logs available are: in-memory logs. The logs available are:
.sp
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBtransaction\fR[=\fIsize\fR]\fR \fB\fBtransaction\fR[=\fIsize\fR]\fR
.ad .ad
.RS 20n .RS 22n
.rt If the \fBaudit\fR debugging option is set (see \fBUMEM_DEBUG\fR), the audit
If the \fBaudit\fR debugging option is set (see \fBUMEM_DEBUG\fR), the audit structures from previous transactions are entered into this log. structures from previous transactions are entered into this log.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBcontents\fR[=\fIsize\fR]\fR \fB\fBcontents\fR[=\fIsize\fR]\fR
.ad .ad
.RS 20n .RS 22n
.rt If the \fBaudit\fR debugging option is set, the contents of objects are
If the \fBaudit\fR debugging option is set, the contents of objects are recorded in this log as they are freed. recorded in this log as they are freed.
.sp .sp
If the "contents" debugging option was not set, 256 bytes of each freed buffer
.sp are saved.
If the "contents" debugging option was not set, 256 bytes of each freed buffer are saved.
.sp
.RE .RE
.sp .sp
.ne 2 .ne 2
.mk
.na .na
\fB\fBfail\fR[=\fIsize\fR]\fR \fB\fBfail\fR[=\fIsize\fR]\fR
.ad .ad
.RS 20n .RS 22n
.rt
Records are entered into this log for every failed allocation. Records are entered into this log for every failed allocation.
.sp
.RE .RE
For any of these options, if \fIsize\fR is not specified, the default value of 64k is used. The \fIsize\fR parameter must be an integer that can be qualified with K, M, G, or T to specify kilobytes, megabytes, gigabytes, or terabytes, respectively. For any of these options, if \fIsize\fR is not specified, the default value of
64k is used. The \fIsize\fR parameter must be an integer that can be qualified
with K, M, G, or T to specify kilobytes, megabytes, gigabytes, or terabytes,
respectively.
.sp .sp
Logs that are not listed or that have either a size of 0 or an invalid size are
disabled.
.sp .sp
Logs that are not listed or that have either a size of 0 or an invalid size are disabled. The log is disabled if during initialization the requested amount of storage
.sp cannot be allocated.
.sp
The log is disabled if during initialization the requested amount of storage cannot be allocated.
.sp
.RE .RE
.SH ATTRIBUTES .SH ATTRIBUTES
.LP
See
\fBattributes\fR(5) for descriptions of the following attributes:
.sp .sp
.LP .LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
.sp .sp
.TS .TS
tab() box; box;
cw(2.75i) |cw(2.75i) c | c
lw(2.75i) |lw(2.75i) l | l .
. ATTRIBUTE TYPE ATTRIBUTE VALUE
ATTRIBUTE TYPEATTRIBUTE VALUE
_ _
Interface StabilityUnstable Interface Stability Unstable
_ _
MT-LevelMT-Safe MT-Level MT-Safe
.TE .TE
.SH SEE ALSO .SH SEE ALSO
.LP
\fBmdb\fR(1),
\fBabort\fR(3C),
\fBsignal\fR(3C),
\fBumem_cache_create\fR(3MALLOC),
\fBattributes\fR(5)
.sp .sp
.LP .LP
\fBmdb\fR(1), \fBabort\fR(3C), \fBsignal\fR(3C),
\fBumem_cache_create\fR(3MALLOC), \fBattributes\fR(5)
.sp .sp
.LP
\fISolaris Modular Debugger Guide\fR
.SH WARNINGS .SH WARNINGS
.LP
When \fBlibumem\fR aborts the process using
\fBabort\fR(3C), any existing signal handler for \fBSIGABRT\fR is called. If the signal handler performs allocations, undefined
behavior can result.
.sp .sp
.LP
When \fBlibumem\fR aborts the process using \fBabort\fR(3C), any existing
signal handler for \fBSIGABRT\fR is called. If the signal handler performs
allocations, undefined behavior can result.
.SH NOTES .SH NOTES
.LP
Some of the debugging features work only for allocations smaller than 16 kilobytes in size. Allocations larger than 16 kilobytes could have reduced support.
.sp .sp
.LP .LP
Activating any of the library's debugging features could significantly increase the library's memory footprint and decrease its performance. Some of the debugging features work only for allocations smaller than 16
kilobytes in size. Allocations larger than 16 kilobytes could have reduced
support.
.sp .sp
.LP
Activating any of the library's debugging features could significantly increase
the library's memory footprint and decrease its performance.

View file

@ -28,6 +28,11 @@
* Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved.
*/ */
/*
* Copyright (c) 2012 Joyent, Inc. All rights reserved.
* Copyright (c) 2015 by Delphix. All rights reserved.
*/
#ifndef _UMEM_IMPL_H #ifndef _UMEM_IMPL_H
#define _UMEM_IMPL_H #define _UMEM_IMPL_H
@ -45,7 +50,7 @@
#include <sys/vmem.h> #include <sys/vmem.h>
#ifdef HAVE_THREAD_H #ifdef HAVE_THREAD_H
# include <thread.h> #include <thread.h>
#else #else
# include "sol_compat.h" # include "sol_compat.h"
#endif #endif
@ -78,6 +83,9 @@ extern "C" {
#define UMF_HASH 0x00000200 /* cache has hash table */ #define UMF_HASH 0x00000200 /* cache has hash table */
#define UMF_RANDOMIZE 0x00000400 /* randomize other umem_flags */ #define UMF_RANDOMIZE 0x00000400 /* randomize other umem_flags */
#define UMF_PTC 0x00000800 /* cache has per-thread caching */
#define UMF_CHECKNULL 0x00001000 /* heap exhaustion checking */
#define UMF_BUFTAG (UMF_DEADBEEF | UMF_REDZONE) #define UMF_BUFTAG (UMF_DEADBEEF | UMF_REDZONE)
#define UMF_TOUCH (UMF_BUFTAG | UMF_LITE | UMF_CONTENTS) #define UMF_TOUCH (UMF_BUFTAG | UMF_LITE | UMF_CONTENTS)
@ -418,6 +426,13 @@ extern void umem_startup(caddr_t, size_t, size_t, caddr_t, caddr_t);
extern int umem_add(caddr_t, size_t); extern int umem_add(caddr_t, size_t);
#endif #endif
/*
* Private interface with libc for tcumem.
*/
extern uintptr_t _tmem_get_base(void);
extern int _tmem_get_nentries(void);
extern void _tmem_set_cleanup(void(*)(void *, int));
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -4,8 +4,7 @@
#include "umem.h" #include "umem.h"
#define UMEM_STANDALONE 1 extern void umem_startup(caddr_t, size_t, size_t, caddr_t, caddr_t);
#include "umem_impl.h"
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
@ -23,3 +22,4 @@ int main(int argc, char *argv[])
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View file

@ -3,6 +3,8 @@
#include "umem.h" #include "umem.h"
extern void umem_startup(caddr_t, size_t, size_t, caddr_t, caddr_t);
static const char *TESTSTRINGS[] = { static const char *TESTSTRINGS[] = {
"fred", "fred",
"fredfredfred", "fredfredfred",

3
vmem.c
View file

@ -22,8 +22,7 @@
/* /*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms. * Use is subject to license terms.
* * Copyright 2012 Joyent, Inc. All rights reserved.
* Portions Copyright 2012 Joyent, Inc. All rights reserved.
*/ */
/* #pragma ident "@(#)vmem.c 1.10 05/06/08 SMI" */ /* #pragma ident "@(#)vmem.c 1.10 05/06/08 SMI" */

View file

@ -23,7 +23,7 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms. * Use is subject to license terms.
* *
* Portions Copyright 2012 Joyent, Inc. All rights reserved. * Copyright 2012 Joyent, Inc. All rights reserved.
*/ */
/* #pragma ident "@(#)vmem_base.c 1.6 05/06/08 SMI" */ /* #pragma ident "@(#)vmem_base.c 1.6 05/06/08 SMI" */

View file

@ -2,8 +2,8 @@
* CDDL HEADER START * CDDL HEADER START
* *
* The contents of this file are subject to the terms of the * The contents of this file are subject to the terms of the
* Common Development and Distribution License, (the "License"). * Common Development and Distribution License (the "License").
You may not use this file except in compliance with the License. * You may not use this file except in compliance with the License.
* *
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing. * or http://www.opensolaris.org/os/licensing.
@ -21,8 +21,7 @@
/* /*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms. * Use is subject to license terms.
* * Copyright 2012 Joyent, Inc. All rights reserved.
* Portions Copyright 2012 Joyent, Inc. All rights reserved.
*/ */
#ifndef _VMEM_BASE_H #ifndef _VMEM_BASE_H

View file

@ -215,6 +215,9 @@ vmem_sbrk_tryfail(vmem_t *src, size_t size, int vmflags)
static void * static void *
vmem_sbrk_alloc(vmem_t *src, size_t size, int vmflags) vmem_sbrk_alloc(vmem_t *src, size_t size, int vmflags)
{ {
extern void *_sbrk_grow_aligned(size_t min_size, size_t low_align,
size_t high_align, size_t *actual_size);
void *ret; void *ret;
void *buf; void *buf;
size_t buf_size; size_t buf_size;