diff --git a/.gitignore b/.gitignore index 1ac03e6..4496e57 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,6 @@ *.la *.o *.libs -INSTALL Makefile.in aclocal.m4 autom4te.cache diff --git a/INSTALL b/INSTALL new file mode 120000 index 0000000..1bb377a --- /dev/null +++ b/INSTALL @@ -0,0 +1 @@ +/usr/local/Cellar/automake/1.11.3/share/automake-1.11/INSTALL \ No newline at end of file diff --git a/README b/README index f5c022e..30da2a2 100644 --- a/README +++ b/README @@ -17,8 +17,14 @@ Linux and Windows versions of our product as we have found it to be stable. We will continue to update this project as and when we make improvements, and welcome third-party patches that improve the usability for everyone. - + Wez Furlong, Message Systems, Inc. wez (at) messagesystems (dot) com + +$ UMEM_OPTIONS=allocator=best ./umem_test +Hello hello there + +UMEM_OPTIONS=allocator=best +=best, =first, =next, or =instant diff --git a/amd64/umem_genasm.c b/amd64/umem_genasm.c new file mode 100644 index 0000000..7dad575 --- /dev/null +++ b/amd64/umem_genasm.c @@ -0,0 +1,609 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + +/* + * Don't Panic! If you find the blocks of assembly that follow confusing and + * you're questioning why they exist, please go read section 8 of the umem.c big + * theory statement. Next familiarize yourself with the malloc and free + * implementations in libumem's malloc.c. + * + * What follows is the amd64 implementation of the thread caching automatic + * assembly generation. The amd64 calling conventions are documented in the + * 64-bit System V ABI. For our purposes what matters is that our first argument + * will come in rdi. Our functions have to preserve rbp, rbx, and r12->r15. We + * are free to do whatever we want with rax, rcx, rdx, rsi, rdi, and r8->r11. + * + * For both our implementation of malloc and free we only use the registers we + * don't have to preserve. + * + * Malloc register usage: + * o. rdi: Original size to malloc. This never changes and is preserved. + * o. rsi: Adjusted malloc size for malloc_data_tag(s). + * o. rcx: Pointer to the tmem_t in the ulwp_t. + * o. rdx: Pointer to the tmem_t array of roots + * o. r8: Size of the cache + * o. r9: Scratch register + * + * Free register usage: + * o. rdi: Original buffer to free. This never changes and is preserved. + * o. rax: The actual buffer, adjusted for the hidden malloc_data_t(s). + * o. rcx: Pointer to the tmem_t in the ulwp_t. + * o. rdx: Pointer to the tmem_t array of roots + * o. r8: Size of the cache + * o. r9: Scratch register + * + * Once we determine what cache we are using, we increment %rdx to the + * appropriate offset and set %r8 with the size of the cache. This means that + * when we break out to the normal buffer allocation point %rdx contains the + * head of the linked list and %r8 is the amount that we have to adjust the + * thread's cached amount by. + * + * Each block of assembly has psuedocode that describes its purpose. + */ + +#include +#include +#include +#include +#include +#include "umem_base.h" + +int umem_genasm_supported = 1; +uintptr_t umem_genasm_mptr; +uintptr_t umem_genasm_msize; +uintptr_t umem_genasm_fptr; +uintptr_t umem_genasm_fsize; +static uintptr_t umem_genasm_omptr; +static uintptr_t umem_genasm_ofptr; + +#define UMEM_GENASM_MAX64 (UINT32_MAX / sizeof (uintptr_t)) +#define PTC_JMPADDR(dest, src) (dest - (src + 4)) +#define PTC_ROOT_SIZE sizeof (uintptr_t) +#define MULTINOP 0x0000441f0f + +/* + * void *ptcmalloc(size_t orig_size); + * + * size_t size = orig_size + 8; + * if (size > UMEM_SECOND_ALIGN) + * size += 8; + * + * if (size < orig_size) + * goto tomalloc; ! This is overflow + * + * if (size > cache_max) + * goto tomalloc + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_MALINIT_JOUT 0x13 +#define PTC_MALINIT_MCS 0x1a +#define PTC_MALINIT_JOV 0x20 +#define PTC_MALINIT_SOFF 0x30 +static const uint8_t malinit[] = { + 0x48, 0x8d, 0x77, 0x08, /* leaq 0x8(%rdi),%rsi */ + 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10, %rsi */ + 0x76, 0x04, /* jbe +0x4 */ + 0x48, 0x8d, 0x77, 0x10, /* leaq 0x10(%rdi),%rsi */ + 0x48, 0x39, 0xfe, /* cmpq %rdi,%rsi */ + 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jb +errout */ + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ + 0x64, 0x48, 0x8b, 0x0c, 0x25, + 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ + 0x48, 0x81, 0xc1, + 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ + 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ +}; + +/* + * void ptcfree(void *buf); + * + * if (buf == NULL) + * return; + * + * malloc_data_t *tag = buf; + * tag--; + * int size = tag->malloc_size; + * int tagval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); + * if (tagval == MALLOC_SECOND_MAGIC) { + * tag--; + * } else if (tagval != MALLOC_MAGIC) { + * goto tofree; + * } + * + * if (size > cache_max) + * goto tofree; + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_FRINI_JDONE 0x05 +#define PTC_FRINI_JFREE 0x25 +#define PTC_FRINI_MCS 0x30 +#define PTC_FRINI_JOV 0x36 +#define PTC_FRINI_SOFF 0x46 +static const uint8_t freeinit[] = { + 0x48, 0x85, 0xff, /* testq %rdi,%rdi */ + 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* jmp $JDONE (done) */ + 0x8b, 0x77, 0xf8, /* movl -0x8(%rdi),%esi */ + 0x8b, 0x47, 0xfc, /* movl -0x4(%rdi),%eax */ + 0x01, 0xf0, /* addl %esi,%eax */ + 0x3d, 0x00, 0x70, 0xba, 0x16, /* cmpl $MALLOC_2_MAGIC, %eax */ + 0x75, 0x06, /* jne +0x6 (checkover) */ + 0x48, 0x8d, 0x47, 0xf0, /* leaq -0x10(%rdi),%eax */ + 0xeb, 0x0f, /* jmp +0xf (freebuf) */ + 0x3d, 0x00, 0xc0, 0x10, 0x3a, /* cmpl $MALLOC_MAGIC, %eax */ + 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jmp +JFREE (goto torfree) */ + 0x48, 0x8d, 0x47, 0xf8, /* leaq -0x8(%rdi),%rax */ + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ + 0x64, 0x48, 0x8b, 0x0c, 0x25, + 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ + 0x48, 0x81, 0xc1, + 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ + 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * } else ... ! goto next cache + */ +#define PTC_INICACHE_CMP 0x03 +#define PTC_INICACHE_SIZE 0x0c +#define PTC_INICACHE_JMP 0x11 +static const uint8_t inicache[] = { + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x77, 0x0c, /* ja +0xc (next cache) */ + 0x49, 0xc7, 0xc0, + 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ + 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp $JMP (allocbuf) */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else ... ! goto next cache + */ +#define PTC_GENCACHE_CMP 0x03 +#define PTC_GENCACHE_SIZE 0x0c +#define PTC_GENCACHE_NUM 0x13 +#define PTC_GENCACHE_JMP 0x18 +static const uint8_t gencache[] = { + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x77, 0x14, /* ja +0xc (next cache) */ + 0x49, 0xc7, 0xc0, + 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ + 0x48, 0x81, 0xc2, + 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf ) */ +}; + +/* + * else if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else { + * goto tofunc; ! goto tomalloc if ptcmalloc. + * } ! goto tofree if ptcfree. + */ +#define PTC_FINCACHE_CMP 0x03 +#define PTC_FINCACHE_JMP 0x08 +#define PTC_FINCACHE_SIZE 0x0c +#define PTC_FINCACHE_NUM 0x13 +static const uint8_t fincache[] = { + 0x48, 0x81, 0xfe, + 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ + 0x77, 0x00, /* ja +JMP (to real malloc) */ + 0x49, 0xc7, 0xc0, + 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ + 0x48, 0x81, 0xc2, + 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ + +}; + +/* + * if (*root == NULL) + * goto tomalloc; + * + * malloc_data_t *ret = *root; + * *root = *(void **)ret; + * t->tm_size += csize; + * ret->malloc_size = size; + * + * if (size > UMEM_SECOND_ALIGN) { + * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); + * ret += 2; + * } else { + * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); + * ret += 1; + * } + * + * return ((void *)ret); + * tomalloc: + * return (malloc(orig_size)); + */ +#define PTC_MALFINI_ALLABEL 0x00 +#define PTC_MALFINI_JMLABEL 0x40 +#define PTC_MALFINI_JMADDR 0x41 +static const uint8_t malfini[] = { + 0x48, 0x8b, 0x02, /* movl (%rdx),%rax */ + 0x48, 0x85, 0xc0, /* testq %rax,%rax */ + 0x74, 0x38, /* je +0x38 (errout) */ + 0x4c, 0x8b, 0x08, /* movq (%rax),%r9 */ + 0x4c, 0x89, 0x0a, /* movq %r9,(%rdx) */ + 0x4c, 0x29, 0x01, /* subq %rsi,(%rcx) */ + 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10,%rsi */ + 0x76, 0x15, /* jbe +0x15 */ + 0x41, 0xb9, 0x00, 0x70, 0xba, 0x16, /* movl $MALLOC_MAGIC_2, %r9d */ + 0x89, 0x70, 0x08, /* movl %r9d,0x8(%rax) */ + 0x41, 0x29, 0xf1, /* subl %esi, %r9d */ + 0x44, 0x89, 0x48, 0x0c, /* movl %r9d, 0xc(%rax) */ + 0x48, 0x83, 0xc0, 0x10, /* addq $0x10, %rax */ + 0xc3, /* ret */ + 0x41, 0xb9, 0x00, 0xc0, 0x10, 0x3a, /* movl %MALLOC_MAGIC, %r9d */ + 0x89, 0x30, /* movl %esi,(%rax) */ + 0x41, 0x29, 0xf1, /* subl %esi,%r9d */ + 0x44, 0x89, 0x48, 0x04, /* movl %r9d,0x4(%rax) */ + 0x48, 0x83, 0xc0, 0x08, /* addq $0x8,%rax */ + 0xc3, /* ret */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $MALLOC */ +}; + +/* + * if (t->tm_size + csize > umem_ptc_size) + * goto tofree; + * + * t->tm_size += csize + * *(void **)tag = *root; + * *root = tag; + * return; + * tofree: + * free(buf); + * return; + */ +#define PTC_FRFINI_RBUFLABEL 0x00 +#define PTC_FRFINI_CACHEMAX 0x09 +#define PTC_FRFINI_DONELABEL 0x1b +#define PTC_FRFINI_JFLABEL 0x1c +#define PTC_FRFINI_JFADDR 0x1d +static const uint8_t freefini[] = { + 0x4c, 0x8b, 0x09, /* movq (%rcx),%r9 */ + 0x4d, 0x01, 0xc1, /* addq %r8, %r9 */ + 0x49, 0x81, 0xf9, + 0x00, 0x00, 0x00, 0x00, /* cmpl $THR_CACHE_MAX, %r9 */ + 0x77, 0x0d, /* jae +0xd (torfree) */ + 0x4c, 0x01, 0x01, /* addq %r8,(%rcx) */ + 0x4c, 0x8b, 0x0a, /* movq (%rdx),%r9 */ + 0x4c, 0x89, 0x08, /* movq %r9,(%rax) */ + 0x48, 0x89, 0x02, /* movq %rax,(%rdx) */ + 0xc3, /* ret */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ +}; + +/* + * Construct the initial part of malloc. off contains the offset from curthread + * to the root of the tmem structure. ep is the address of the label to error + * and jump to free. csize is the size of the largest umem_cache in ptcumem. + */ +static int +genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) +{ + uint32_t addr; + + bcopy(malinit, bp, sizeof (malinit)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); + bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); + bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); + bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); + + return (sizeof (malinit)); +} + +static int +genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mcs) +{ + uint32_t addr; + + bcopy(freeinit, bp, sizeof (freeinit)); + addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); + bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); + bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); + bcopy(&mcs, bp + PTC_FRINI_MCS, sizeof (mcs)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); + bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); + return (sizeof (freeinit)); +} + + +/* + * Create the initial cache entry of the specified size. The value of ap tells + * us what the address of the label to try and allocate a buffer. This value is + * an offset from the current base to that value. + */ +static int +genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + + bcopy(inicache, bp, sizeof (inicache)); + bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); + addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); + ASSERT(addr != 0); + bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); + + return (sizeof (inicache)); +} + +static int +genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + uint32_t coff; + + ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); + ASSERT(num != 0); + bcopy(gencache, bp, sizeof (gencache)); + bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); + coff = num * PTC_ROOT_SIZE; + bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); + addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); + bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); + + return (sizeof (gencache)); +} + +static int +genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) +{ + uint8_t eap; + uint32_t coff; + + ASSERT(ep <= 0xff && ep > 7); + ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); + bcopy(fincache, bp, sizeof (fincache)); + bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); + coff = num * PTC_ROOT_SIZE; + bcopy(&coff, bp + PTC_FINCACHE_NUM, sizeof (coff)); + eap = ep - PTC_FINCACHE_JMP - 1; + bcopy(&eap, bp + PTC_FINCACHE_JMP, sizeof (eap)); + + return (sizeof (fincache)); +} + +static int +genasm_malfini(uint8_t *bp, uintptr_t mptr) +{ + uint32_t addr; + + bcopy(malfini, bp, sizeof (malfini)); + addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); + bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); + + return (sizeof (malfini)); +} + +static int +genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) +{ + uint32_t addr; + + bcopy(freefini, bp, sizeof (freefini)); + bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); + addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); + bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); + + return (sizeof (freefini)); +} + +/* + * The malloc inline assembly is constructed as follows: + * + * o Malloc prologue assembly + * o Generic first-cache check + * o n Generic cache checks (where n = _tmem_get_entries() - 2) + * o Generic last-cache check + * o Malloc epilogue assembly + * + * Generally there are at least three caches. When there is only one cache we + * only use the generic last-cache. In the case where there are two caches, we + * just leave out the middle ones. + */ +static int +genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + int ii, off; + uint8_t *bp; + size_t total; + uint32_t allocoff, erroff; + + total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); + + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; + allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; + + bp = base; + + off = genasm_malinit(bp, umem_tmem_off, erroff, + umem_alloc_sizes[nents-1]); + bp += off; + allocoff -= off; + erroff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_malfini(bp, umem_genasm_omptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +static int +genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + uint8_t *bp; + int ii, off; + size_t total; + uint32_t rbufoff, retoff, erroff; + + /* Assume that nents has already been audited for us */ + total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); + rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); + retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); + + bp = base; + + off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, + umem_alloc_sizes[nents - 1]); + bp += off; + erroff -= off; + rbufoff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); + bp += off; + erroff -= off; + rbufoff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); + bp += off; + rbufoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +/*ARGSUSED*/ +int +umem_genasm(int *cp, umem_cache_t **caches, int nc) +{ + int nents, i; + uint8_t *mptr; + uint8_t *fptr; + uint32_t *ptr; + uint64_t v, *vptr; + + mptr = (void *)((uintptr_t)&umem_genasm_mptr + 5); + fptr = (void *)((uintptr_t)&umem_genasm_fptr + 5); + if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || + umem_genasm_fptr == 0 || umem_genasm_fsize == 0) + return (1); + + /* + * The total number of caches that we can service is the minimum of: + * o the amount supported by libc + * o the total number of umem caches + * o we use a single byte addl, so its MAX_UINT32 / sizeof (uintptr_t). + * For 64-bit, this is MAX_UINT32 >> 3, a lot. + */ + nents = _tmem_get_nentries(); + + if (UMEM_GENASM_MAX64 < nents) + nents = UMEM_GENASM_MAX64; + + if (nc < nents) + nents = nc; + + /* Based on our constraints, this is not an error */ + if (nents == 0 || umem_ptc_size == 0) + return (0); + + /* Grab the original malloc and free locations */ + ptr = (void *)(mptr - 4); + umem_genasm_omptr = *ptr + (uintptr_t)mptr; + ptr = (void *)(fptr - 4); + umem_genasm_ofptr = *ptr + (uintptr_t)fptr; + + /* Take into account the jump */ + if (genasm_malloc(mptr, umem_genasm_fsize - 5, nents, cp) != 0) + return (1); + + if (genasm_free(fptr, umem_genasm_fsize - 5, nents, cp) != 0) + return (1); + + /* nop out the jump with a multibyte jump */ + vptr = (void *)&umem_genasm_mptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + vptr = (void *)&umem_genasm_fptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + + + for (i = 0; i < nents; i++) + caches[i]->cache_flags |= UMF_PTC; + + return (0); +} diff --git a/envvar.c b/envvar.c index 1f03950..ce035cb 100644 --- a/envvar.c +++ b/envvar.c @@ -23,9 +23,9 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2012 Joyent, Inc. All rights reserved. + * Copyright 2012 Joyent, Inc. All rights reserved. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ /* #pragma ident "@(#)envvar.c 1.5 05/06/08 SMI" */ @@ -181,7 +181,10 @@ static umem_env_item_t umem_options_items[] = { }, #endif #endif - + { "perthread_cache", "Evolving", ITEM_SIZE, + "Size (in bytes) of per-thread allocation cache", + NULL, 0, NULL, &umem_ptc_size + }, { NULL, "-- end of UMEM_OPTIONS --", ITEM_INVALID } }; diff --git a/getpcstack.c b/getpcstack.c index c2f5166..5fedbcd 100644 --- a/getpcstack.c +++ b/getpcstack.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,12 +18,12 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - */ -/* - * Portions Copyright 2006-2008 Message Systems, Inc. + * + * Copyright 2006-2008 Message Systems, Inc. */ /* #pragma ident "@(#)getpcstack.c 1.5 05/06/08 SMI" */ diff --git a/i386/asm_subr.s b/i386/asm_subr.s new file mode 100644 index 0000000..2edb2b4 --- /dev/null +++ b/i386/asm_subr.s @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include + +#if defined(lint) + +void * +getfp(void) +{ + return (NULL); +} + +#ifndef UMEM_STANDALONE +void +_breakpoint(void) +{ + return; +} +#endif + +#else /* lint */ + +#if defined(__amd64) + + ENTRY(getfp) + movq %rbp, %rax + ret + SET_SIZE(getfp) + +#else /* __i386 */ + + ENTRY(getfp) + movl %ebp, %eax + ret + SET_SIZE(getfp) + +#endif + +#ifndef UMEM_STANDALONE + ENTRY(_breakpoint) + int $3 + ret + SET_SIZE(_breakpoint) +#endif + +#endif /* lint */ diff --git a/i386/umem_genasm.c b/i386/umem_genasm.c new file mode 100644 index 0000000..0bfa338 --- /dev/null +++ b/i386/umem_genasm.c @@ -0,0 +1,603 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + */ + +/* + * Don't Panic! If you find the blocks of assembly that follow confusing and + * you're questioning why they exist, please go read section 8 of the umem.c big + * theory statement. Next familiarize yourself with the malloc and free + * implementations in libumem's malloc.c. + * + * What follows is the i386 implementation of the thread caching automatic + * assembly generation. With i386 a function only has three registers its + * allowed to change without restoring them: eax, ecx, and edx. All others have + * to be preserved. Since the set of registers we have available is so small, we + * have to make use of esi, ebx, and edi and save their original values to the + * stack. + * + * Malloc register usage: + * o. esi: Size of the malloc (passed into us and modified) + * o. edi: Size of the cache + * o. eax: Buffer to return + * o. ebx: Scratch space and temporary values + * o. ecx: Pointer to the tmem_t in the ulwp_t. + * o. edx: Pointer to the tmem_t array of roots + * + * Free register usage: + * o. esi: Size of the malloc (passed into us and modified) + * o. edi: Size of the cache + * o. eax: Buffer to free + * o. ebx: Scratch space and temporary values + * o. ecx: Pointer to the tmem_t in the ulwp_t. + * o. edx: Pointer to the tmem_t array of roots + * + * Once we determine what cache we are using, we increment %edx to the + * appropriate offset and set %edi with the size of the cache. This means that + * when we break out to the normal buffer allocation point %edx contains the + * head of the linked list and %edi is the amount that we have to adjust the + * total amount cached by the thread. + * + * Each block of assembly has psuedocode that describes its purpose. + */ + +#include +#include +#include +#include "umem_base.h" + +#include + +int umem_genasm_supported = 1; +uintptr_t umem_genasm_mptr; +size_t umem_genasm_msize; +uintptr_t umem_genasm_fptr; +size_t umem_genasm_fsize; +static uintptr_t umem_genasm_omptr; +static uintptr_t umem_genasm_ofptr; + +/* + * The maximum number of caches we can support. We use a single byte addl so + * this is 255 (UINT8_MAX) / sizeof (uintptr_t). In this case 63 + */ +#define UMEM_GENASM_MAX32 63 + +#define PTC_JMPADDR(dest, src) (dest - (src + 4)) +#define PTC_ROOT_SIZE sizeof (uintptr_t) +#define MULTINOP 0x0000441f0f + +/* + * void *ptcmalloc(size_t orig_size); + * + * size_t size = orig_size + 8; + * + * if (size < orig_size) + * goto tomalloc; ! This is overflow + * + * if (size > cache_size) + * goto tomalloc; + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_MALINIT_JOUT 0x0e +#define PTC_MALINIT_MCS 0x14 +#define PTC_MALINIT_JOV 0x1a +#define PTC_MALINIT_SOFF 0x27 +static const uint8_t malinit[] = { + 0x55, /* pushl %ebp */ + 0x89, 0xe5, /* movl %esp, %ebp */ + 0x57, /* pushl %edi */ + 0x56, /* pushl %esi */ + 0x53, /* pushl %ebx */ + 0x8b, 0x75, 0x08, /* movl 0x8(%ebp), %esi */ + 0x83, 0xc6, 0x08, /* addl $0x8,%esi */ + 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jc +$JMP (errout) */ + 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ + 0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */ + 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $OFF, %ecx */ + 0x8d, 0x51, 0x04 /* leal 0x4(%ecx), %edx */ +}; + +/* + * void ptcfree(void *buf); + * + * if (buf == NULL) + * return; + * + * malloc_data_t *tag = buf; + * tag--; + * int size = tag->malloc_size; + * int tagtval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); + * + * if (tagval != MALLOC_MAGIC) + * goto tofree; + * + * if (size > cache_max) + * goto tofree; + * + * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; + * void **roots = t->tm_roots; + */ +#define PTC_FRINI_JDONE 0x0d +#define PTC_FRINI_JFREE 0x23 +#define PTC_FRINI_MCS 0x29 +#define PTC_FRINI_JOV 0x2f +#define PTC_FRINI_SOFF 0x3c +static const uint8_t freeinit[] = { + 0x55, /* pushl %ebp */ + 0x89, 0xe5, /* movl %esp, %ebp */ + 0x57, /* pushl %edi */ + 0x56, /* pushl %esi */ + 0x53, /* pushl %ebx */ + 0x8b, 0x45, 0x08, /* movl 0x8(%ebp), %eax */ + 0x85, 0xc0, /* testl %eax, %eax */ + 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* je $JDONE (done) */ + 0x83, 0xe8, 0x08, /* subl $0x8,%eax */ + 0x8b, 0x30, /* movl (%eax),%esi */ + 0x8b, 0x50, 0x04, /* movl 0x4(%eax),%edx */ + 0x01, 0xf2, /* addl %esi,%edx */ + 0x81, 0xfa, 0x00, 0xc0, 0x10, 0x3a, /* cmpl MAGIC32, %edx */ + 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jne +JFREE (goto freebuf) */ + + 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ + 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ + 0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */ + 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $0xOFF, %ecx */ + 0x8d, 0x51, 0x04 /* leal 0x4(%ecx),%edx */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * } else ... ! goto next cache + */ +#define PTC_INICACHE_CMP 0x02 +#define PTC_INICACHE_SIZE 0x09 +#define PTC_INICACHE_JMP 0x0e +static const uint8_t inicache[] = { + 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ + 0x77, 0x0a, /* ja +0xa */ + 0xbf, 0xff, 0x00, 0x00, 0x00, /* movl sizeof ($C0), %edi */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ +}; + +/* + * if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else ... ! goto next cache + */ +#define PTC_GENCACHE_CMP 0x02 +#define PTC_GENCACHE_NUM 0x0a +#define PTC_GENCACHE_SIZE 0x0c +#define PTC_GENCACHE_JMP 0x11 +static const uint8_t gencache[] = { + 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($CACHE), %esi */ + 0x77, 0x0d, /* ja +0xd (next cache) */ + 0x83, 0xc2, 0x00, /* addl $4*$ii, %edx */ + 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CACHE), %edi */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ +}; + +/* + * else if (size <= $CACHE_SIZE) { + * csize = $CACHE_SIZE; + * roots += $CACHE_NUM; + * } else { + * goto tofunc; ! goto tomalloc if ptcmalloc. + * } ! goto tofree if ptcfree. + */ +#define PTC_FINCACHE_CMP 0x02 +#define PTC_FINCACHE_JMP 0x07 +#define PTC_FINCACHE_NUM 0x0a +#define PTC_FINCACHE_SIZE 0x0c +static const uint8_t fincache[] = { + 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($CLAST), %esi */ + 0x77, 0x00, /* ja +$JMP (to errout) */ + 0x83, 0xc2, 0x00, /* addl $4*($NCACHES-1), %edx */ + 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CLAST), %edi */ +}; + +/* + * if (*root == NULL) + * goto tomalloc; + * + * malloc_data_t *ret = *root; + * *root = *(void **)ret; + * t->tm_size += csize; + * ret->malloc_size = size; + * + * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); + * ret++; + * + * return ((void *)ret); + * tomalloc: + * return (malloc(orig_size)); + */ +#define PTC_MALFINI_ALLABEL 0x00 +#define PTC_MALFINI_JMLABEL 0x20 +#define PTC_MALFINI_JMADDR 0x25 +static const uint8_t malfini[] = { + /* allocbuf: */ + 0x8b, 0x02, /* movl (%edx), %eax */ + 0x85, 0xc0, /* testl %eax, %eax */ + 0x74, 0x1a, /* je +0x1a (errout) */ + 0x8b, 0x18, /* movl (%eax), %esi */ + 0x89, 0x1a, /* movl %esi, (%edx) */ + 0x29, 0x39, /* subl %edi, (%ecx) */ + 0x89, 0x30, /* movl %esi, ($eax) */ + 0xba, 0x00, 0xc0, 0x10, 0x3a, /* movl $0x3a10c000,%edx */ + 0x29, 0xf2, /* subl %esi, %edx */ + 0x89, 0x50, 0x04, /* movl %edx, 0x4(%eax) */ + 0x83, 0xc0, 0x08, /* addl %0x8, %eax */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xc3, /* ret */ + /* errout: */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $malloc */ +}; + +/* + * if (t->tm_size + csize > umem_ptc_size) + * goto tofree; + * + * t->tm_size += csize + * *(void **)tag = *root; + * *root = tag; + * return; + * tofree: + * free(buf); + * return; + */ +#define PTC_FRFINI_RBUFLABEL 0x00 +#define PTC_FRFINI_CACHEMAX 0x06 +#define PTC_FRFINI_DONELABEL 0x14 +#define PTC_FRFINI_JFLABEL 0x19 +#define PTC_FRFINI_JFADDR 0x1e +static const uint8_t freefini[] = { + /* freebuf: */ + 0x8b, 0x19, /* movl (%ecx),%ebx */ + 0x01, 0xfb, /* addl %edi,%ebx */ + 0x81, 0xfb, 0x00, 0x00, 0x00, 0x00, /* cmpl maxsize, %ebx */ + 0x73, 0x0d, /* jae +0xd */ + 0x01, 0x39, /* addl %edi,(%ecx) */ + 0x8b, 0x3a, /* movl (%edx),%edi */ + 0x89, 0x38, /* movl %edi,(%eax) */ + 0x89, 0x02, /* movl %eax,(%edx) */ + /* done: */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xc3, /* ret */ + /* realfree: */ + 0x5b, /* popl %ebx */ + 0x5e, /* popl %esi */ + 0x5f, /* popl %edi */ + 0xc9, /* leave */ + 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ +}; + +/* + * Construct the initial part of malloc. off contains the offset from curthread + * to the root of the tmem structure. ep is the address of the label to error + * and jump to free. csize is the size of the largest umem_cache in ptcumem. + */ +static int +genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) +{ + uint32_t addr; + + bcopy(malinit, bp, sizeof (malinit)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); + bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); + bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); + addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); + bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); + + return (sizeof (malinit)); +} + +static int +genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mc) +{ + uint32_t addr; + + bcopy(freeinit, bp, sizeof (freeinit)); + addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); + bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); + bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); + bcopy(&mc, bp + PTC_FRINI_MCS, sizeof (mc)); + addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); + bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); + bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); + return (sizeof (freeinit)); +} + +/* + * Create the initial cache entry of the specified size. The value of ap tells + * us what the address of the label to try and allocate a buffer. This value is + * an offset from the current base to that value. + */ +static int +genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + + bcopy(inicache, bp, sizeof (inicache)); + bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); + addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); + ASSERT(addr != 0); + bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); + + return (sizeof (inicache)); +} + +static int +genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) +{ + uint32_t addr; + uint8_t coff; + + ASSERT(256 / PTC_ROOT_SIZE > num); + ASSERT(num != 0); + bcopy(gencache, bp, sizeof (gencache)); + bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); + coff = num * PTC_ROOT_SIZE; + bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); + addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); + bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); + + return (sizeof (gencache)); +} + +static int +genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) +{ + uint8_t addr; + + ASSERT(ep <= 0xff && ep > 7); + ASSERT(256 / PTC_ROOT_SIZE > num); + bcopy(fincache, bp, sizeof (fincache)); + bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); + bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); + addr = num * PTC_ROOT_SIZE; + bcopy(&addr, bp + PTC_FINCACHE_NUM, sizeof (addr)); + addr = ep - PTC_FINCACHE_JMP - 1; + bcopy(&addr, bp + PTC_FINCACHE_JMP, sizeof (addr)); + + return (sizeof (fincache)); +} + +static int +genasm_malfini(uint8_t *bp, uintptr_t mptr) +{ + uint32_t addr; + + bcopy(malfini, bp, sizeof (malfini)); + addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); + bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); + + return (sizeof (malfini)); +} + +static int +genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) +{ + uint32_t addr; + + bcopy(freefini, bp, sizeof (freefini)); + bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); + addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); + bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); + + return (sizeof (freefini)); +} + +/* + * The malloc inline assembly is constructed as follows: + * + * o Malloc prologue assembly + * o Generic first-cache check + * o n Generic cache checks (where n = _tmem_get_entries() - 2) + * o Generic last-cache check + * o Malloc epilogue assembly + * + * Generally there are at least three caches. When there is only one cache we + * only use the generic last-cache. In the case where there are two caches, we + * just leave out the middle ones. + */ +static int +genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + int ii, off; + uint8_t *bp; + size_t total; + uint32_t allocoff, erroff; + + total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); + + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; + allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; + + bp = base; + + off = genasm_malinit(bp, umem_tmem_off, erroff, + umem_alloc_sizes[nents-1]); + bp += off; + allocoff -= off; + erroff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); + bp += off; + allocoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_malfini(bp, umem_genasm_omptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +static int +genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) +{ + uint8_t *bp; + int ii, off; + size_t total; + uint32_t rbufoff, retoff, erroff; + + /* Assume that nents has already been audited for us */ + total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); + if (nents >= 2) + total += sizeof (inicache) + sizeof (gencache) * (nents - 2); + + if (total > len) + return (1); + + erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); + rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); + retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); + + bp = base; + + off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, + umem_alloc_sizes[nents - 1]); + bp += off; + erroff -= off; + rbufoff -= off; + + if (nents > 1) { + off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); + bp += off; + erroff -= off; + rbufoff -= off; + } + + for (ii = 1; ii < nents - 1; ii++) { + off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); + bp += off; + rbufoff -= off; + erroff -= off; + } + + bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], + erroff); + bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); + ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); + + return (0); +} + +int +umem_genasm(int *alloc_sizes, umem_cache_t **caches, int ncaches) +{ + int nents, i; + uint8_t *mptr; + uint8_t *fptr; + uint32_t *ptr; + uint64_t v, *vptr; + + mptr = (void *)((uintptr_t)&umem_genasm_mptr + 5); + fptr = (void *)((uintptr_t)&umem_genasm_fptr + 5); + if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || + umem_genasm_fptr == 0 || umem_genasm_fsize == 0) + return (1); + + /* + * The total number of caches that we can service is the minimum of: + * o the amount supported by libc + * o the total number of umem caches + * o we use a single byte addl, so its 255 / sizeof (uintptr_t). For + * 32-bit, this is 63. + */ + nents = _tmem_get_nentries(); + + if (UMEM_GENASM_MAX32 < nents) + nents = UMEM_GENASM_MAX32; + + if (ncaches < nents) + nents = ncaches; + + /* Based on our constraints, this is not an error */ + if (nents == 0 || umem_ptc_size == 0) + return (0); + + /* Grab the original malloc and free locations */ + ptr = (void *)(mptr - 4); + umem_genasm_omptr = *ptr + (uintptr_t)mptr; + ptr = (void *)(fptr - 4); + umem_genasm_ofptr = *ptr + (uintptr_t)fptr; + + /* Take into account the jump */ + if (genasm_malloc(mptr, umem_genasm_fsize - 5, nents, + alloc_sizes) != 0) + return (1); + + if (genasm_free(fptr, umem_genasm_fsize - 5, nents, + alloc_sizes) != 0) + return (1); + + /* nop out the jump with a multibyte jump */ + vptr = (void *)&umem_genasm_mptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + vptr = (void *)&umem_genasm_fptr; + v = MULTINOP; + v |= *vptr & (0xffffffULL << 40); + (void) atomic_swap_64(vptr, v); + + for (i = 0; i < nents; i++) + caches[i]->cache_flags |= UMF_PTC; + + return (0); +} diff --git a/init_lib.c b/init_lib.c index e165a56..2338bef 100644 --- a/init_lib.c +++ b/init_lib.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ /* #pragma ident "@(#)init_lib.c 1.2 05/06/08 SMI" */ @@ -149,7 +148,7 @@ umem_get_max_ncpus(void) return info.dwNumberOfProcessors; #else /* XXX: determine CPU count on other platforms */ - return (1); + #error Cannot detremine CPU count on this platform, please submit a bug (and a patch) for this platform. #endif #endif /* linux */ diff --git a/init_stand.c b/init_stand.c index 56bd76a..849193f 100644 --- a/init_stand.c +++ b/init_stand.c @@ -24,7 +24,7 @@ * Use is subject to license terms. */ -#pragma ident "@(#)init_stand.c 1.3 05/06/08 SMI" +/* #pragma ident "@(#)init_stand.c 1.3 05/06/08 SMI" */ /* * Initialization routines for the standalone version of libumem. diff --git a/linktest_stand.c b/linktest_stand.c index d0b9701..ec97460 100644 --- a/linktest_stand.c +++ b/linktest_stand.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -25,7 +24,7 @@ * Use is subject to license terms. */ -#pragma ident "@(#)linktest_stand.c 1.3 05/06/08 SMI" +/* #pragma ident "@(#)linktest_stand.c 1.3 05/06/08 SMI" */ /* * This file is used to verify that the standalone's external dependencies @@ -35,6 +34,7 @@ void __umem_assert_failed(void) {} void _atomic_add_64(void) {} void _atomic_add_32_nv(void) {} +void dladdr1(void) {} void bcopy(void) {} void bzero(void) {} void dladdr1(void) {} @@ -43,6 +43,7 @@ void getenv(void) {} void gethrtime(void) {} void membar_producer(void) {} void memcpy(void) {} +void _memcpy(void) {} void memset(void) {} void snprintf(void) {} void strchr(void) {} diff --git a/malloc.c b/malloc.c index 57920ef..1ce5419 100644 --- a/malloc.c +++ b/malloc.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -25,7 +24,7 @@ * Use is subject to license terms. */ -#pragma ident "@(#)malloc.c 1.5 05/06/08 SMI" +/* #pragma ident "@(#)malloc.c 1.5 05/06/08 SMI" */ #include "config.h" #include diff --git a/misc.c b/misc.c index 4cc8beb..c792d00 100644 --- a/misc.c +++ b/misc.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ /* #pragma ident "@(#)misc.c 1.6 05/06/08 SMI" */ diff --git a/misc.h b/misc.h index 43db6b3..7cb204d 100644 --- a/misc.h +++ b/misc.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ #ifndef _MISC_H @@ -39,6 +38,7 @@ #endif #ifdef HAVE_THREAD_H # include +# include #else # include "sol_compat.h" #endif diff --git a/stand_mapfile b/stand_mapfile index 65defd0..548a95a 100644 --- a/stand_mapfile +++ b/stand_mapfile @@ -1,13 +1,11 @@ # -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. +# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. # # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. diff --git a/stub_stand.c b/stub_stand.c index e6b8a62..35ec4b3 100644 --- a/stub_stand.c +++ b/stub_stand.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -23,9 +22,11 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ -#pragma ident "@(#)stub_stand.c 1.3 05/06/08 SMI" +/* #pragma ident "@(#)stub_stand.c 1.3 05/06/08 SMI" */ /* * Stubs for the standalone to reduce the dependence on external libraries @@ -124,3 +125,29 @@ issetugid(void) { return (1); } + +int +_tmem_get_nentries(void) +{ + return (0); +} + +uintptr_t +_tmem_get_base(void) +{ + return (0); +} + +/*ARGSUSED*/ +void +_tmem_set_cleanup(void (*f)(int, void *)) +{ +} + +uint64_t +atomic_swap_64(volatile uint64_t *t, uint64_t v) +{ + uint64_t old = *t; + *t = v; + return (old); +} diff --git a/sys/vmem_impl_user.h b/sys/vmem_impl_user.h index 3afab1e..6be4f4e 100644 --- a/sys/vmem_impl_user.h +++ b/sys/vmem_impl_user.h @@ -23,7 +23,7 @@ * Copyright 1999-2002 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ #ifndef _SYS_VMEM_IMPL_USER_H diff --git a/umem.c b/umem.c index c0657bd..ea01d4a 100644 --- a/umem.c +++ b/umem.c @@ -25,7 +25,7 @@ * Portions Copyright 2012 Joyent, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ /* #pragma ident "@(#)umem.c 1.11 05/06/08 SMI" */ @@ -78,18 +78,22 @@ * * 1. Overview * ----------- - * umem is very close to kmem in implementation. There are four major + * umem is very close to kmem in implementation. There are seven major * areas of divergence: * - * * Initialization + * * Initialization * - * * CPU handling + * * CPU handling * - * * umem_update() + * * umem_update() * - * * KM_SLEEP v.s. UMEM_NOFAIL + * * KM_SLEEP v.s. UMEM_NOFAIL * - * * lock ordering + * * lock ordering + * + * * changing UMEM_MAXBUF + * + * * Per-thread caching for malloc/free * * 2. Initialization * ----------------- @@ -97,9 +101,9 @@ * into it before it is ready. umem does not have these luxuries. Instead, * initialization is divided into two phases: * - * * library initialization, and + * * library initialization, and * - * * first use + * * first use * * umem's full initialization happens at the time of the first allocation * request (via malloc() and friends, umem_alloc(), or umem_zalloc()), @@ -129,13 +133,13 @@ * * There are four different paths from which umem_init() is called: * - * * from umem_alloc() or umem_zalloc(), with 0 < size < UMEM_MAXBUF, + * * from umem_alloc() or umem_zalloc(), with 0 < size < UMEM_MAXBUF, * - * * from umem_alloc() or umem_zalloc(), with size > UMEM_MAXBUF, + * * from umem_alloc() or umem_zalloc(), with size > UMEM_MAXBUF, * - * * from umem_cache_create(), and + * * from umem_cache_create(), and * - * * from memalign(), with align > UMEM_ALIGN. + * * from memalign(), with align > UMEM_ALIGN. * * The last three just check if umem is initialized, and call umem_init() * if it is not. For performance reasons, the first case is more complicated. @@ -160,16 +164,16 @@ * There are a couple race conditions resulting from the initialization * code that we have to guard against: * - * * In umem_cache_create(), there is a special UMC_INTERNAL cflag - * that is passed for caches created during initialization. It - * is illegal for a user to try to create a UMC_INTERNAL cache. - * This allows initialization to proceed, but any other - * umem_cache_create()s will block by calling umem_init(). + * * In umem_cache_create(), there is a special UMC_INTERNAL cflag + * that is passed for caches created during initialization. It + * is illegal for a user to try to create a UMC_INTERNAL cache. + * This allows initialization to proceed, but any other + * umem_cache_create()s will block by calling umem_init(). * - * * Since umem_null_cache has a 1-element cache_cpu, it's cache_cpu_mask - * is always zero. umem_cache_alloc uses cp->cache_cpu_mask to - * mask the cpu number. This prevents a race between grabbing a - * cache pointer out of umem_alloc_table and growing the cpu array. + * * Since umem_null_cache has a 1-element cache_cpu, it's cache_cpu_mask + * is always zero. umem_cache_alloc uses cp->cache_cpu_mask to + * mask the cpu number. This prevents a race between grabbing a + * cache pointer out of umem_alloc_table and growing the cpu array. * * * 3. CPU handling @@ -203,16 +207,16 @@ * ----------------------------------------- * A given cache is in one of three states: * - * Inactive cache_uflags is zero, cache_u{next,prev} are NULL + * Inactive cache_uflags is zero, cache_u{next,prev} are NULL * - * Work Requested cache_uflags is non-zero (but UMU_ACTIVE is not set), - * cache_u{next,prev} link the cache onto the global - * update list + * Work Requested cache_uflags is non-zero (but UMU_ACTIVE is not set), + * cache_u{next,prev} link the cache onto the global + * update list * - * Active cache_uflags has UMU_ACTIVE set, cache_u{next,prev} - * are NULL, and either umem_update_thr or - * umem_st_update_thr are actively doing work on the - * cache. + * Active cache_uflags has UMU_ACTIVE set, cache_u{next,prev} + * are NULL, and either umem_update_thr or + * umem_st_update_thr are actively doing work on the + * cache. * * An update can be added to any cache in any state -- if the cache is * Inactive, it transitions to being Work Requested. If the cache is @@ -249,12 +253,12 @@ * The update thread spends most of its time in cond_timedwait() on the * umem_update_cv. It wakes up under two conditions: * - * * The timedwait times out, in which case it needs to run a global - * update, or + * * The timedwait times out, in which case it needs to run a global + * update, or * - * * someone cond_broadcast(3THR)s the umem_update_cv, in which case - * it needs to check if there are any caches in the Work Requested - * state. + * * someone cond_broadcast(3THR)s the umem_update_cv, in which case + * it needs to check if there are any caches in the Work Requested + * state. * * When it is time for another global update, umem calls umem_cache_update() * on every cache, then calls vmem_update(), which tunes the vmem structures. @@ -290,19 +294,19 @@ * * Because we locked all of the mutexes, the only possible inconsistancies are: * - * * a umem_cache_alloc() could leak its buffer. + * * a umem_cache_alloc() could leak its buffer. * - * * a caller of umem_depot_alloc() could leak a magazine, and all the - * buffers contained in it. + * * a caller of umem_depot_alloc() could leak a magazine, and all the + * buffers contained in it. * - * * a cache could be in the Active update state. In the child, there - * would be no thread actually working on it. + * * a cache could be in the Active update state. In the child, there + * would be no thread actually working on it. * - * * a umem_hash_rescale() could leak the new hash table. + * * a umem_hash_rescale() could leak the new hash table. * - * * a umem_magazine_resize() could be in progress. + * * a umem_magazine_resize() could be in progress. * - * * a umem_reap() could be in progress. + * * a umem_reap() could be in progress. * * The memory leaks we can't do anything about. umem_release_child() resets * the update state, moves any caches in the Active state to the Work Requested @@ -328,24 +332,24 @@ * that its clients have any particular type of behavior. Instead, * it provides two types of allocations: * - * * UMEM_DEFAULT, equivalent to KM_NOSLEEP (i.e. return NULL on - * failure) + * * UMEM_DEFAULT, equivalent to KM_NOSLEEP (i.e. return NULL on + * failure) * - * * UMEM_NOFAIL, which, on failure, calls an optional callback - * (registered with umem_nofail_callback()). + * * UMEM_NOFAIL, which, on failure, calls an optional callback + * (registered with umem_nofail_callback()). * * The callback is invoked with no locks held, and can do an arbitrary * amount of work. It then has a choice between: * - * * Returning UMEM_CALLBACK_RETRY, which will cause the allocation - * to be restarted. + * * Returning UMEM_CALLBACK_RETRY, which will cause the allocation + * to be restarted. * - * * Returning UMEM_CALLBACK_EXIT(status), which will cause exit(2) - * to be invoked with status. If multiple threads attempt to do - * this simultaneously, only one will call exit(2). + * * Returning UMEM_CALLBACK_EXIT(status), which will cause exit(2) + * to be invoked with status. If multiple threads attempt to do + * this simultaneously, only one will call exit(2). * - * * Doing some kind of non-local exit (thr_exit(3thr), longjmp(3C), - * etc.) + * * Doing some kind of non-local exit (thr_exit(3thr), longjmp(3C), + * etc.) * * The default callback returns UMEM_CALLBACK_EXIT(255). * @@ -354,16 +358,16 @@ * close to the original allocation, with no inconsistent state or held * locks. The following steps are taken: * - * * All invocations of vmem are VM_NOSLEEP. + * * All invocations of vmem are VM_NOSLEEP. * - * * All constructor callbacks (which can themselves to allocations) - * are passed UMEM_DEFAULT as their required allocation argument. This - * way, the constructor will fail, allowing the highest-level allocation - * invoke the nofail callback. + * * All constructor callbacks (which can themselves to allocations) + * are passed UMEM_DEFAULT as their required allocation argument. This + * way, the constructor will fail, allowing the highest-level allocation + * invoke the nofail callback. * - * If a constructor callback _does_ do a UMEM_NOFAIL allocation, and - * the nofail callback does a non-local exit, we will leak the - * partially-constructed buffer. + * If a constructor callback _does_ do a UMEM_NOFAIL allocation, and + * the nofail callback does a non-local exit, we will leak the + * partially-constructed buffer. * * * 6. Lock Ordering @@ -371,26 +375,24 @@ * umem has a few more locks than kmem does, mostly in the update path. The * overall lock ordering (earlier locks must be acquired first) is: * - * umem_init_lock + * umem_init_lock * - * vmem_list_lock - * vmem_nosleep_lock.vmpl_mutex - * vmem_t's: - * vm_lock - * sbrk_lock + * vmem_list_lock + * vmem_nosleep_lock.vmpl_mutex + * vmem_t's: + * vm_lock + * sbrk_lock * - * umem_cache_lock - * umem_update_lock - * umem_flags_lock - * umem_cache_t's: - * cache_cpu[*].cc_lock - * cache_depot_lock - * cache_lock - * umem_log_header_t's: - * lh_cpu[*].clh_lock - * lh_lock - * - * \endcode + * umem_cache_lock + * umem_update_lock + * umem_flags_lock + * umem_cache_t's: + * cache_cpu[*].cc_lock + * cache_depot_lock + * cache_lock + * umem_log_header_t's: + * lh_cpu[*].clh_lock + * lh_lock * * 7. Changing UMEM_MAXBUF * ----------------------- @@ -402,6 +404,237 @@ * * The second place to update, which is not required, is the umem_alloc_sizes. * These determine the default cache sizes that we're going to support. + * + * 8. Per-thread caching for malloc/free + * ------------------------------------- + * + * "Time is an illusion. Lunchtime doubly so." -- Douglas Adams + * + * Time may be an illusion, but CPU cycles aren't. While libumem is designed + * to be a highly scalable allocator, that scalability comes with a fixed cycle + * penalty even in the absence of contention: libumem must acquire (and release + * a per-CPU lock for each allocation. When contention is low and malloc(3C) + * frequency is high, this overhead can dominate execution time. To alleviate + * this, we allow for per-thread caching, a lock-free means of caching recent + * deallocations on a per-thread basis for use in satisfying subsequent calls + * + * In addition to improving performance, we also want to: + * * Minimize fragmentation + * * Not add additional memory overhead (no larger malloc tags) + * + * In the ulwp_t of each thread there is a private data structure called a + * umem_t that looks like: + * + * typedef struct { + * size_t tm_size; + * void *tm_roots[NTMEMBASE]; (Currently 16) + * } tmem_t; + * + * Each of the roots is treated as the head of a linked list. Each entry in the + * list can be thought of as a void ** which points to the next entry, until one + * of them points to NULL. If the head points to NULL, the list is empty. + * + * Each head corresponds to a umem_cache. Currently there is a linear mapping + * where the first root corresponds to the first cache, second root to the + * second cache, etc. This works because every allocation that malloc makes to + * umem_alloc that can be satisified by a umem_cache will actually return a + * number of bytes equal to the size of that cache. Because of this property and + * a one to one mapping between caches and roots we can guarantee that every + * entry in a given root's list will be able to satisfy the same requests as the + * corresponding cache. + * + * The maximum amount of memory that can be cached in each thread is determined + * by the perthread_cache UMEM_OPTION. It corresponds to the umem_ptc_size + * value. The default value for this is currently 1 MB. Once umem_init() has + * finished this cannot be directly tuned without directly modifying the + * instruction text. If, upon calling free(3C), the amount cached would exceed + * this maximum, we instead actually return the buffer to the umem_cache instead + * of holding onto it in the thread. + * + * When a thread calls malloc(3C) it first determines which umem_cache it + * would be serviced by. If the allocation is not covered by ptcumem it goes to + * the normal malloc instead. Next, it checks if the tmem_root's list is empty + * or not. If it is empty, we instead go and allocate the memory from + * umem_alloc. If it is not empty, we remove the head of the list, set the + * appropriate malloc tags, and return that buffer. + * + * When a thread calls free(3C) it first looks at the malloc tag and if it is + * invalid or the allocation exceeds the largest cache in ptcumem and sends it + * off to the original free() to handle and clean up appropriately. Next, it + * checks if the allocation size is covered by one of the per-thread roots and + * if it isn't, it passes it off to the original free() to be released. Finally, + * before it inserts this buffer as the head, it checks if adding this buffer + * would put the thread over its maximum cache size. If it would, it frees the + * buffer back to the umem_cache. Otherwise it increments the threads total + * cached amount and makes the buffer the new head of the appropriate tm_root. + * + * When a thread exits, all of the buffers that it has in its per-thread cache + * will be passed to umem_free() and returned to the appropriate umem_cache. + * + * 8.1 Handling addition and removal of umem_caches + * ------------------------------------------------ + * + * The set of umem_caches that are used to back calls to umem_alloc() and + * ultimately malloc() are determined at program execution time. The default set + * of caches is defined below in umem_alloc_sizes[]. Various umem_options exist + * that modify the set of caches: size_add, size_clear, and size_remove. Because + * the set of caches can only be determined once umem_init() has been called and + * we have the additional goals of minimizing additional fragmentation and + * metadata space overhead in the malloc tags, this forces our hand to go down a + * slightly different path: the one tread by fasttrap and trapstat. + * + * During umem_init we're going to dynamically construct a new version of + * malloc(3C) and free(3C) that utilizes the known cache sizes and then ensure + * that ptcmalloc and ptcfree replace malloc and free as entries in the plt. If + * ptcmalloc and ptcfree cannot handle a request, they simply jump to the + * original libumem implementations. + * + * After creating all of the umem_caches, but before making them visible, + * umem_cache_init checks that umem_genasm_supported is non-zero. This value is + * set by each architecture in $ARCH/umem_genasm.c to indicate whether or not + * they support this. If the value is zero, then this process is skipped. + * Similarly, if the cache size has been tuned to zero by UMEM_OPTIONS, then + * this is also skipped. + * + * In umem_genasm.c, each architecture's implementation implements a single + * function called umem_genasm() that is responsible for generating the + * appropriate versions of ptcmalloc() and ptcfree(), placing them in the + * appropriate memory location, and finally doing the switch from malloc() and + * free() to ptcmalloc() and ptcfree(). Once the change has been made, there is + * no way to switch back, short of restarting the program or modifying program + * text with mdb. + * + * 8.2 Modifying the Procedure Linkage Table (PLT) + * ----------------------------------------------- + * + * The last piece of this puzzle is how we actually jam ptcmalloc() into the + * PLT. The dyanmic linker has support for global and local audit libraries. + * For the full explanation of audit libraries consult the Linkers and Libraries + * guide or the linker source. A local auditer can attach to a single library + * and interpose on all of the relocations that come in from and leave to that + * same library. To facilitate our work, we have created a local audit library + * for libumem that is called libumem_trampoline and is located in + * lib/libumem_trampoline/. + * + * When any resolution is done to malloc(), the audit library allows us to + * replace the address with an address that it specifies. There are two 4k + * sections in the libumem_trampoline's bss which we use as the stomping grounds + * for ptcmalloc and ptcfree. When the audit library audits the malloc and free + * functions from libumem, it encodes their address and sets its buffers to + * contain a simple trampoline which consists of a jmp instruction and a four + * byte offset to the original malloc and free. libumem_trampoline's mapfile + * explicitly makes its bss rwx instead of rw to support this. + * + * When umem_genasm() is called, it uses a similar mechanism to get the address + * and size of the trampoline libraries malloc (mbuf) and free (fbuf) buffers. + * After validating that the size will be able to contain all of the + * instructions, it starts laying out ptcmalloc and ptcfree at mbuf[4] and + * fbuf[4]. Once both have been successfully generated, umem_genasm() stores a + * single five byte nop over the original jump. + * + * 8.3 umem_genasm() + * ----------------- + * + * umem_genasm() is currently implemented for i386 and amd64. This section + * describes the theory behind the construction. For specific byte code to + * assembly instructions and niceish C and asm versions of ptcmalloc and + * ptcfree, see the individual umem_genasm.c files. The layout consists of the + * following sections: + * + * o. function-specfic prologue + * o. function-generic cache-selecting elements + * o. function-specific epilogue + * + * There are three different generic cache elements that exist: + * + * o. the last or only cache + * o. the intermediary caches if more than two + * o. the first one if more than one cache + * + * The malloc and free prologues and epilogues mimic the necessary portions of + * libumem's malloc and free. This includes things like checking for size + * overflow, setting and verifying the malloc tags. + * + * It is an important constraint that these functions do not make use of the + * call instruction. The only jmp outside of the individual functions is to the + * original libumem malloc and free respectively. Because doing things like + * setting errno or raising an internal umem error on improper malloc tags would + * require using calls into the PLT, whenever we encounter one of those cases we + * just jump to the original malloc and free functions reusing the same stack + * frame. + * + * Each of the above sections, the three caches, and the malloc and free + * prologue and epilogue are implemented as blocks of machine code with the + * corresponding assembly in comments. There are known offsets into each block + * that corresponds to locations of data and addresses that we only know at run + * time. These blocks are copied as necessary and the blanks filled in + * appropriately. + * + * As mentioned in section 8.2, the trampoline library uses specifically named + * variables to communicate the buffers and size to use. These variables are: + * + * o. umem_genasm_mptr: The buffer for ptcmalloc + * o. umem_genasm_msize: The size in bytes of the above buffer + * o. umem_genasm_fptr: The buffer for ptcfree + * o. umem_genasm_fsize: The size in bytes of the above buffer + * + * Finally, to enable the generated assembly we need to remove the previous jump + * to the actual malloc that exists at the start of these buffers. This is a + * five byte region. We could zero out the jump offset to be a jmp +0, but + * using nops can be faster. We specifically use a single five byte nop which is + * faster. The opcode for the five byte nop is 0x 0f 1f 44 00 00. On x86, + * remember integers are little endian, so it will be written the other way + * around. + * + * 8.4 Interface with libc.so + * -------------------------- + * + * The tmem_t structure as described in the beginning of section 8, is part of a + * private interface with libc. There are three functions that exist to cover + * this. They are not documented in man pages or header files. They are in the + * SUNWprivate part of libc's makefile. + * + * o. _tmem_get_base(void) + * + * Returns the offset from the ulwp_t (curthread) to the tmem_t structure. + * This is a constant for all threads and is effectively a way to to do + * ::offsetof ulwp_t ul_tmem without having to know the specifics of the + * structure outside of libc. + * + * o. _tmem_get_nentries(void) + * + * Returns the number of roots that exist in the tmem_t. This is one part + * of the cap on the number of umem_caches that we can back with tmem. + * + * o. _tmem_set_cleanup(void (*)(void *, int)) + * + * This sets a clean up handler that gets called back when a thread exits. + * There is one call per buffer, the void * is a pointer to the buffer on + * the list, the int is the index into the roots array for this buffer. + * + * 8.5 Tuning and disabling per-thread caching + * ------------------------------------------- + * + * There is only one tunable for per-thread caching: the amount of memory each + * thread should be able to cache. This is specified via the perthread_cache + * UMEM_OPTION option. No attempt is made to to sanity check the specified + * value; the limit is simply the maximum value of a size_t. + * + * If the perthread_cache UMEM_OPTION is set to zero, nomagazines was requested, + * or UMEM_DEBUG has been turned on then we will never call into umem_genasm; + * however, the trampoline audit library and jump will still be in place. + * + * 8.6 Observing efficacy of per-thread caching + * -------------------------------------------- + * + * To understand the efficacy of per-thread caching, use the ::umastat dcmd + * to see the percentage of capacity consumed on a per-thread basis, the + * degree to which each umem cache contributes to per-thread cache consumption, + * and the number of buffers in per-thread caches on a per-umem cache basis. + * If more detail is required, the specific buffers in a per-thread cache can + * be iterated over with the umem_ptc_* walkers. (These walkers allow an + * optional ulwp_t to be specified to iterate only over a particular thread's + * cache.) */ #include "config.h" @@ -524,8 +757,10 @@ size_t umem_lite_minsize = 0; /* minimum buffer size for UMF_LITE */ size_t umem_lite_maxalign = 1024; /* maximum buffer alignment for UMF_LITE */ size_t umem_maxverify; /* maximum bytes to inspect in debug routines */ size_t umem_minfirewall; /* hardware-enforced redzone threshold */ +size_t umem_ptc_size = 1048576; /* size of per-thread cache (in bytes) */ uint_t umem_flags = 0; +uintptr_t umem_tmem_off; mutex_t umem_init_lock = DEFAULTMUTEX; /* locks initialization */ cond_t umem_init_cv = DEFAULTCV; /* initialization CV */ @@ -533,6 +768,8 @@ thread_t umem_init_thr; /* thread initializing */ int umem_init_env_ready; /* environ pre-initted */ int umem_ready = UMEM_READY_STARTUP; +int umem_ptc_enabled; /* per-thread caching enabled */ + static umem_nofail_callback_t *nofail_callback; static mutex_t umem_nofail_exit_lock = DEFAULTMUTEX; static thread_t umem_nofail_exit_thr; @@ -2917,6 +3154,24 @@ umem_alloc_sizes_remove(size_t size) umem_alloc_sizes[i] = 0; } +/* + * We've been called back from libc to indicate that thread is terminating and + * that it needs to release the per-thread memory that it has. We get to know + * which entry in the thread's tmem array the allocation came from. Currently + * this refers to first n umem_caches which makes this a pretty simple indexing + * job. + */ +static void +umem_cache_tmem_cleanup(void *buf, int entry) +{ + size_t size; + umem_cache_t *cp; + + size = umem_alloc_sizes[entry]; + cp = umem_alloc_table[(size - 1) >> UMEM_ALIGN_SHIFT]; + _umem_cache_free(cp, buf); +} + static int umem_cache_init(void) { @@ -3032,6 +3287,16 @@ umem_cache_init(void) umem_alloc_caches[i] = cp; } + umem_tmem_off = _tmem_get_base(); + _tmem_set_cleanup(umem_cache_tmem_cleanup); + + if (umem_genasm_supported && !(umem_flags & UMF_DEBUG) && + !(umem_flags & UMF_NOMAGAZINE) && + umem_ptc_size > 0) { + umem_ptc_enabled = umem_genasm(umem_alloc_sizes, + umem_alloc_caches, i) == 0 ? 1 : 0; + } + /* * Initialization cannot fail at this point. Make the caches * visible to umem_alloc() and friends. diff --git a/umem_agent_support.c b/umem_agent_support.c index a89d69c..08fd010 100644 --- a/umem_agent_support.c +++ b/umem_agent_support.c @@ -23,7 +23,7 @@ * Copyright 2002 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ /* #pragma ident "@(#)umem_agent_support.c 1.2 05/06/08 SMI" */ diff --git a/umem_base.h b/umem_base.h index 3b638a6..f28f62b 100644 --- a/umem_base.h +++ b/umem_base.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -22,6 +21,8 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _UMEM_BASE_H @@ -76,6 +77,8 @@ extern volatile uint32_t umem_reaping; #define UMEM_REAP_ADDING 0x00000001 /* umem_reap() is active */ #define UMEM_REAP_ACTIVE 0x00000002 /* update thread is reaping */ +extern uintptr_t umem_tmem_off; + /* * umem.c: tunables */ @@ -98,6 +101,7 @@ extern size_t umem_lite_minsize; extern size_t umem_lite_maxalign; extern size_t umem_maxverify; extern size_t umem_minfirewall; +extern size_t umem_ptc_size; extern uint32_t umem_flags; diff --git a/umem_fail.c b/umem_fail.c index c8439d6..e3476ac 100644 --- a/umem_fail.c +++ b/umem_fail.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ /* #pragma ident "@(#)umem_fail.c 1.4 05/06/08 SMI" */ diff --git a/umem_impl.h b/umem_impl.h index bde89ce..5ecb5b5 100644 --- a/umem_impl.h +++ b/umem_impl.h @@ -21,11 +21,11 @@ */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * - * Portions Copyright 2012 Joyent, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2012 Joyent, Inc. All rights reserved. + * + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ #ifndef _UMEM_IMPL_H @@ -78,6 +78,7 @@ extern "C" { #define UMF_HASH 0x00000200 /* cache has hash table */ #define UMF_RANDOMIZE 0x00000400 /* randomize other umem_flags */ +#define UMF_PTC 0x00000800 /* cache has per-thread caching */ #define UMF_BUFTAG (UMF_DEADBEEF | UMF_REDZONE) #define UMF_TOUCH (UMF_BUFTAG | UMF_LITE | UMF_CONTENTS) diff --git a/umem_update_thread.c b/umem_update_thread.c index 6a624c3..53bd75d 100644 --- a/umem_update_thread.c +++ b/umem_update_thread.c @@ -23,7 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ /* #pragma ident "@(#)umem_update_thread.c 1.2 05/06/08 SMI" */ diff --git a/umemdbg/mdb/common/leaky_subr.c b/umemdbg/mdb/common/leaky_subr.c new file mode 100644 index 0000000..0142b19 --- /dev/null +++ b/umemdbg/mdb/common/leaky_subr.c @@ -0,0 +1,1151 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "umem.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "leaky_impl.h" +#include "misc.h" +#include "proc_kludges.h" + +#include "umem_pagesize.h" + +/* + * This file defines the libumem target for ../genunix/leaky.c. + * + * See ../genunix/leaky_impl.h for the target interface definition. + */ + +/* + * leaky_subr_dump_start()/_end() depend on the ordering of TYPE_VMEM, + * TYPE_MMAP and TYPE_SBRK. + */ +#define TYPE_MMAP 0 /* lkb_data is the size */ +#define TYPE_SBRK 1 /* lkb_data is the size */ +#define TYPE_VMEM 2 /* lkb_data is the vmem_seg's size */ +#define TYPE_CACHE 3 /* lkb_cid is the bufctl's cache */ +#define TYPE_UMEM 4 /* lkb_cid is the bufctl's cache */ + +#define LKM_CTL_BUFCTL 0 /* normal allocation, PTR is bufctl */ +#define LKM_CTL_VMSEG 1 /* oversize allocation, PTR is vmem_seg_t */ +#define LKM_CTL_MEMORY 2 /* non-umem mmap or brk, PTR is region start */ +#define LKM_CTL_CACHE 3 /* normal alloc, non-debug, PTR is cache */ +#define LKM_CTL_MASK 3L + +/* + * create a lkm_bufctl from a pointer and a type + */ +#define LKM_CTL(ptr, type) (LKM_CTLPTR(ptr) | (type)) +#define LKM_CTLPTR(ctl) ((uintptr_t)(ctl) & ~(LKM_CTL_MASK)) +#define LKM_CTLTYPE(ctl) ((uintptr_t)(ctl) & (LKM_CTL_MASK)) + +static uintptr_t leak_brkbase; +static uintptr_t leak_brksize; + +#define LEAKY_INBRK(ptr) \ + (((uintptr_t)(ptr) - leak_brkbase) < leak_brksize) + +typedef struct leaky_seg_info { + uintptr_t ls_start; + uintptr_t ls_end; +} leaky_seg_info_t; + +typedef struct leaky_maps { + leaky_seg_info_t *lm_segs; + uintptr_t lm_seg_count; + uintptr_t lm_seg_max; + + pstatus_t *lm_pstatus; + + leak_mtab_t **lm_lmp; +} leaky_maps_t; + +/*ARGSUSED*/ +static int +leaky_mtab(uintptr_t addr, const umem_bufctl_audit_t *bcp, leak_mtab_t **lmp) +{ + leak_mtab_t *lm = (*lmp)++; + + lm->lkm_base = (uintptr_t)bcp->bc_addr; + lm->lkm_bufctl = LKM_CTL(addr, LKM_CTL_BUFCTL); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +leaky_mtab_addr(uintptr_t addr, void *ignored, leak_mtab_t **lmp) +{ + leak_mtab_t *lm = (*lmp)++; + + lm->lkm_base = addr; + + return (WALK_NEXT); +} + +static int +leaky_seg(uintptr_t addr, const vmem_seg_t *seg, leak_mtab_t **lmp) +{ + leak_mtab_t *lm = (*lmp)++; + + lm->lkm_base = seg->vs_start; + lm->lkm_limit = seg->vs_end; + lm->lkm_bufctl = LKM_CTL(addr, LKM_CTL_VMSEG); + return (WALK_NEXT); +} + +static int +leaky_vmem(uintptr_t addr, const vmem_t *vmem, leak_mtab_t **lmp) +{ + if (strcmp(vmem->vm_name, "umem_oversize") != 0 && + strcmp(vmem->vm_name, "umem_memalign") != 0) + return (WALK_NEXT); + + if (mdb_pwalk("vmem_alloc", (mdb_walk_cb_t)leaky_seg, lmp, addr) == -1) + mdb_warn("can't walk vmem_alloc for %s (%p)", vmem->vm_name, + addr); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +leaky_estimate_vmem(uintptr_t addr, const vmem_t *vmem, size_t *est) +{ + if (strcmp(vmem->vm_name, "umem_oversize") != 0 && + strcmp(vmem->vm_name, "umem_memalign") != 0) + return (WALK_NEXT); + + *est += (int)(vmem->vm_kstat.vk_alloc - vmem->vm_kstat.vk_free); + + return (WALK_NEXT); +} + +static int +leaky_seg_cmp(const void *l, const void *r) +{ + const leaky_seg_info_t *lhs = (const leaky_seg_info_t *)l; + const leaky_seg_info_t *rhs = (const leaky_seg_info_t *)r; + + if (lhs->ls_start < rhs->ls_start) + return (-1); + if (lhs->ls_start > rhs->ls_start) + return (1); + + return (0); +} + +static ssize_t +leaky_seg_search(uintptr_t addr, leaky_seg_info_t *listp, unsigned count) +{ + ssize_t left = 0, right = count - 1, guess; + + while (right >= left) { + guess = (right + left) >> 1; + + if (addr < listp[guess].ls_start) { + right = guess - 1; + continue; + } + + if (addr >= listp[guess].ls_end) { + left = guess + 1; + continue; + } + + return (guess); + } + + return (-1); +} + +/*ARGSUSED*/ +static int +leaky_count(uintptr_t addr, void *unused, size_t *total) +{ + ++*total; + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +leaky_read_segs(uintptr_t addr, const vmem_seg_t *seg, leaky_maps_t *lmp) +{ + leaky_seg_info_t *my_si = lmp->lm_segs + lmp->lm_seg_count; + + if (seg->vs_start == seg->vs_end && seg->vs_start == 0) + return (WALK_NEXT); + + if (lmp->lm_seg_count++ >= lmp->lm_seg_max) + return (WALK_ERR); + + my_si->ls_start = seg->vs_start; + my_si->ls_end = seg->vs_end; + + return (WALK_NEXT); +} + +/* ARGSUSED */ +static int +leaky_process_anon_mappings(uintptr_t ignored, const prmap_t *pmp, + leaky_maps_t *lmp) +{ + uintptr_t start = pmp->pr_vaddr; + uintptr_t end = pmp->pr_vaddr + pmp->pr_size; + + leak_mtab_t *lm; + pstatus_t *Psp = lmp->lm_pstatus; + + uintptr_t brk_start = Psp->pr_brkbase; + uintptr_t brk_end = Psp->pr_brkbase + Psp->pr_brksize; + + int has_brk = 0; + int in_vmem = 0; + + /* + * This checks if there is any overlap between the segment and the brk. + */ + if (end > brk_start && start < brk_end) + has_brk = 1; + + if (leaky_seg_search(start, lmp->lm_segs, lmp->lm_seg_count) != -1) + in_vmem = 1; + + /* + * We only want anonymous, mmaped memory. That means: + * + * 1. Must be read-write + * 2. Cannot be shared + * 3. Cannot have backing + * 4. Cannot be in the brk + * 5. Cannot be part of the vmem heap. + */ + if ((pmp->pr_mflags & (MA_READ | MA_WRITE)) == (MA_READ | MA_WRITE) && + (pmp->pr_mflags & MA_SHARED) == 0 && + (pmp->pr_mapname[0] == 0) && + !has_brk && + !in_vmem) { + dprintf(("mmaped region: [%p, %p)\n", start, end)); + lm = (*lmp->lm_lmp)++; + lm->lkm_base = start; + lm->lkm_limit = end; + lm->lkm_bufctl = LKM_CTL(pmp->pr_vaddr, LKM_CTL_MEMORY); + } + + return (WALK_NEXT); +} + +static void +leaky_handle_sbrk(leaky_maps_t *lmp) +{ + uintptr_t brkbase = lmp->lm_pstatus->pr_brkbase; + uintptr_t brkend = brkbase + lmp->lm_pstatus->pr_brksize; + + leak_mtab_t *lm; + + leaky_seg_info_t *segs = lmp->lm_segs; + + int x, first = -1, last = -1; + + dprintf(("brk: [%p, %p)\n", brkbase, brkend)); + + for (x = 0; x < lmp->lm_seg_count; x++) { + if (segs[x].ls_start >= brkbase && segs[x].ls_end <= brkend) { + if (first == -1) + first = x; + last = x; + } + } + + if (brkbase == brkend) { + dprintf(("empty brk -- do nothing\n")); + } else if (first == -1) { + dprintf(("adding [%p, %p) whole brk\n", brkbase, brkend)); + + lm = (*lmp->lm_lmp)++; + lm->lkm_base = brkbase; + lm->lkm_limit = brkend; + lm->lkm_bufctl = LKM_CTL(brkbase, LKM_CTL_MEMORY); + } else { + uintptr_t curbrk = P2ROUNDUP(brkbase, umem_pagesize); + + if (curbrk != segs[first].ls_start) { + dprintf(("adding [%p, %p) in brk, before first seg\n", + brkbase, segs[first].ls_start)); + + lm = (*lmp->lm_lmp)++; + lm->lkm_base = brkbase; + lm->lkm_limit = segs[first].ls_start; + lm->lkm_bufctl = LKM_CTL(brkbase, LKM_CTL_MEMORY); + + curbrk = segs[first].ls_start; + + } else if (curbrk != brkbase) { + dprintf(("ignore [%p, %p) -- realign\n", brkbase, + curbrk)); + } + + for (x = first; x <= last; x++) { + if (curbrk < segs[x].ls_start) { + dprintf(("adding [%p, %p) in brk\n", curbrk, + segs[x].ls_start)); + + lm = (*lmp->lm_lmp)++; + lm->lkm_base = curbrk; + lm->lkm_limit = segs[x].ls_start; + lm->lkm_bufctl = LKM_CTL(curbrk, + LKM_CTL_MEMORY); + } + curbrk = segs[x].ls_end; + } + + if (curbrk < brkend) { + dprintf(("adding [%p, %p) in brk, after last seg\n", + curbrk, brkend)); + + lm = (*lmp->lm_lmp)++; + lm->lkm_base = curbrk; + lm->lkm_limit = brkend; + lm->lkm_bufctl = LKM_CTL(curbrk, LKM_CTL_MEMORY); + } + } +} + +static int +leaky_handle_anon_mappings(leak_mtab_t **lmp) +{ + leaky_maps_t lm; + + vmem_t *heap_arena; + vmem_t *vm_next; + vmem_t *heap_top; + vmem_t vmem; + + pstatus_t Ps; + + if (mdb_get_xdata("pstatus", &Ps, sizeof (Ps)) == -1) { + mdb_warn("couldn't read pstatus xdata"); + return (DCMD_ERR); + } + lm.lm_pstatus = &Ps; + + leak_brkbase = Ps.pr_brkbase; + leak_brksize = Ps.pr_brksize; + + if (umem_readvar(&heap_arena, "heap_arena") == -1) { + mdb_warn("couldn't read heap_arena"); + return (DCMD_ERR); + } + + if (heap_arena == NULL) { + mdb_warn("heap_arena is NULL.\n"); + return (DCMD_ERR); + } + + for (vm_next = heap_arena; vm_next != NULL; vm_next = vmem.vm_source) { + if (mdb_vread(&vmem, sizeof (vmem), (uintptr_t)vm_next) == -1) { + mdb_warn("couldn't read vmem at %p", vm_next); + return (DCMD_ERR); + } + heap_top = vm_next; + } + + lm.lm_seg_count = 0; + lm.lm_seg_max = 0; + + if (mdb_pwalk("vmem_span", (mdb_walk_cb_t)leaky_count, + &lm.lm_seg_max, (uintptr_t)heap_top) == -1) { + mdb_warn("couldn't walk vmem_span for vmem %p", heap_top); + return (DCMD_ERR); + } + lm.lm_segs = mdb_alloc(lm.lm_seg_max * sizeof (*lm.lm_segs), + UM_SLEEP | UM_GC); + + if (mdb_pwalk("vmem_span", (mdb_walk_cb_t)leaky_read_segs, &lm, + (uintptr_t)heap_top) == -1) { + mdb_warn("couldn't walk vmem_span for vmem %p", + heap_top); + return (DCMD_ERR); + } + + if (lm.lm_seg_count > lm.lm_seg_max) { + mdb_warn("segment list for vmem %p grew\n", heap_top); + return (DCMD_ERR); + } + + qsort(lm.lm_segs, lm.lm_seg_count, sizeof (*lm.lm_segs), leaky_seg_cmp); + + lm.lm_lmp = lmp; + + prockludge_add_walkers(); + + if (mdb_walk(KLUDGE_MAPWALK_NAME, + (mdb_walk_cb_t)leaky_process_anon_mappings, &lm) == -1) { + mdb_warn("Couldn't walk "KLUDGE_MAPWALK_NAME); + prockludge_remove_walkers(); + return (DCMD_ERR); + } + + prockludge_remove_walkers(); + leaky_handle_sbrk(&lm); + + return (DCMD_OK); +} + +static int +leaky_interested(const umem_cache_t *c) +{ + vmem_t vmem; + + if (mdb_vread(&vmem, sizeof (vmem), (uintptr_t)c->cache_arena) == -1) { + mdb_warn("cannot read arena %p for cache '%s'", + (uintptr_t)c->cache_arena, c->cache_name); + return (0); + } + + /* + * If this cache isn't allocating from either the umem_default or + * umem_firewall vmem arena, we're not interested. + */ + if (strcmp(vmem.vm_name, "umem_default") != 0 && + strcmp(vmem.vm_name, "umem_firewall") != 0) { + dprintf(("Skipping cache '%s' with arena '%s'\n", + c->cache_name, vmem.vm_name)); + return (0); + } + + return (1); +} + +/*ARGSUSED*/ +static int +leaky_estimate(uintptr_t addr, const umem_cache_t *c, size_t *est) +{ + if (!leaky_interested(c)) + return (WALK_NEXT); + + *est += umem_estimate_allocated(addr, c); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +leaky_cache(uintptr_t addr, const umem_cache_t *c, leak_mtab_t **lmp) +{ + leak_mtab_t *lm = *lmp; + mdb_walk_cb_t cb; + const char *walk; + int audit = (c->cache_flags & UMF_AUDIT); + + if (!leaky_interested(c)) + return (WALK_NEXT); + + if (audit) { + walk = "bufctl"; + cb = (mdb_walk_cb_t)leaky_mtab; + } else { + walk = "umem"; + cb = (mdb_walk_cb_t)leaky_mtab_addr; + } + if (mdb_pwalk(walk, cb, lmp, addr) == -1) { + mdb_warn("can't walk umem for cache %p (%s)", addr, + c->cache_name); + return (WALK_DONE); + } + + for (; lm < *lmp; lm++) { + lm->lkm_limit = lm->lkm_base + c->cache_bufsize; + if (!audit) + lm->lkm_bufctl = LKM_CTL(addr, LKM_CTL_CACHE); + } + return (WALK_NEXT); +} + +static char *map_head = "%-?s %?s %-10s used reason\n"; +static char *map_fmt = "[%?p,%?p) %-10s "; +#define BACKING_LEN 10 /* must match the third field's width in map_fmt */ + +static void +leaky_mappings_header(void) +{ + dprintf((map_head, "mapping", "", "backing")); +} + +/* ARGSUSED */ +static int +leaky_grep_mappings(uintptr_t ignored, const prmap_t *pmp, + const pstatus_t *Psp) +{ + const char *map_libname_ptr; + char db_mp_name[BACKING_LEN+1]; + + map_libname_ptr = strrchr(pmp->pr_mapname, '/'); + if (map_libname_ptr != NULL) + map_libname_ptr++; + else + map_libname_ptr = pmp->pr_mapname; + + strlcpy(db_mp_name, map_libname_ptr, sizeof (db_mp_name)); + + dprintf((map_fmt, pmp->pr_vaddr, (char *)pmp->pr_vaddr + pmp->pr_size, + db_mp_name)); + +#define USE(rsn) dprintf_cont(("yes %s\n", (rsn))) +#define IGNORE(rsn) dprintf_cont(("no %s\n", (rsn))) + + if (!(pmp->pr_mflags & MA_WRITE) || !(pmp->pr_mflags & MA_READ)) { + IGNORE("read-only"); + } else if (pmp->pr_vaddr <= Psp->pr_brkbase && + pmp->pr_vaddr + pmp->pr_size > Psp->pr_brkbase) { + USE("bss"); /* grab up to brkbase */ + leaky_grep(pmp->pr_vaddr, Psp->pr_brkbase - pmp->pr_vaddr); + } else if (pmp->pr_vaddr >= Psp->pr_brkbase && + pmp->pr_vaddr < Psp->pr_brkbase + Psp->pr_brksize) { + IGNORE("in brk"); + } else if (pmp->pr_vaddr == Psp->pr_stkbase && + pmp->pr_size == Psp->pr_stksize) { + IGNORE("stack"); + } else if (0 == strcmp(map_libname_ptr, "a.out")) { + USE("a.out data"); + leaky_grep(pmp->pr_vaddr, pmp->pr_size); + } else if (0 == strncmp(map_libname_ptr, "libumem.so", 10)) { + IGNORE("part of umem"); + } else if (pmp->pr_mapname[0] != 0) { + USE("lib data"); /* library data/bss */ + leaky_grep(pmp->pr_vaddr, pmp->pr_size); + } else if ((pmp->pr_mflags & MA_ANON) && pmp->pr_mapname[0] == 0) { + IGNORE("anon"); + } else { + IGNORE(""); /* default to ignoring */ + } + +#undef USE +#undef IGNORE + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +leaky_mark_lwp(void *ignored, const lwpstatus_t *lwp) +{ + leaky_mark_ptr(lwp->pr_reg[R_SP] + STACK_BIAS); + return (0); +} + +/*ARGSUSED*/ +static int +leaky_process_lwp(void *ignored, const lwpstatus_t *lwp) +{ + const uintptr_t *regs = (const uintptr_t *)&lwp->pr_reg; + int i; + uintptr_t sp; + uintptr_t addr; + size_t size; + + for (i = 0; i < R_SP; i++) + leaky_grep_ptr(regs[i]); + + sp = regs[i++] + STACK_BIAS; + if (leaky_lookup_marked(sp, &addr, &size)) + leaky_grep(sp, size - (sp - addr)); + + for (; i < NPRGREG; i++) + leaky_grep_ptr(regs[i]); + + return (0); +} + +/* + * Handles processing various proc-related things: + * 1. calls leaky_process_lwp on each the LWP + * 2. leaky_greps the bss/data of libraries and a.out, and the a.out stack. + */ +static int +leaky_process_proc(void) +{ + pstatus_t Ps; + struct ps_prochandle *Pr; + + if (mdb_get_xdata("pstatus", &Ps, sizeof (Ps)) == -1) { + mdb_warn("couldn't read pstatus xdata"); + return (DCMD_ERR); + } + + dprintf(("pstatus says:\n")); + dprintf(("\tbrk: base %p size %p\n", + Ps.pr_brkbase, Ps.pr_brksize)); + dprintf(("\tstk: base %p size %p\n", + Ps.pr_stkbase, Ps.pr_stksize)); + + if (mdb_get_xdata("pshandle", &Pr, sizeof (Pr)) == -1) { + mdb_warn("couldn't read pshandle xdata"); + return (DCMD_ERR); + } + + if (Plwp_iter(Pr, leaky_mark_lwp, NULL) != 0) { + mdb_warn("findleaks: Failed to iterate lwps\n"); + return (DCMD_ERR); + } + + if (Plwp_iter(Pr, leaky_process_lwp, NULL) != 0) { + mdb_warn("findleaks: Failed to iterate lwps\n"); + return (DCMD_ERR); + } + + prockludge_add_walkers(); + + leaky_mappings_header(); + + if (mdb_walk(KLUDGE_MAPWALK_NAME, (mdb_walk_cb_t)leaky_grep_mappings, + &Ps) == -1) { + mdb_warn("Couldn't walk "KLUDGE_MAPWALK_NAME); + prockludge_remove_walkers(); + return (-1); + } + + prockludge_remove_walkers(); + + return (0); +} + +static void +leaky_subr_caller(const uintptr_t *stack, uint_t depth, char *buf, + uintptr_t *pcp) +{ + int i; + GElf_Sym sym; + uintptr_t pc = 0; + + buf[0] = 0; + + for (i = 0; i < depth; i++) { + pc = stack[i]; + + if (mdb_lookup_by_addr(pc, + MDB_SYM_FUZZY, buf, MDB_SYM_NAMLEN, &sym) == -1) + continue; + if (strncmp(buf, "libumem.so", 10) == 0) + continue; + + *pcp = pc; + return; + } + + /* + * We're only here if the entire call chain is in libumem.so; + * this shouldn't happen, but we'll just use the last caller. + */ + *pcp = pc; +} + +int +leaky_subr_bufctl_cmp(const leak_bufctl_t *lhs, const leak_bufctl_t *rhs) +{ + char lbuf[MDB_SYM_NAMLEN], rbuf[MDB_SYM_NAMLEN]; + uintptr_t lcaller, rcaller; + int rval; + + leaky_subr_caller(lhs->lkb_stack, lhs->lkb_depth, lbuf, &lcaller); + leaky_subr_caller(rhs->lkb_stack, lhs->lkb_depth, rbuf, &rcaller); + + if (rval = strcmp(lbuf, rbuf)) + return (rval); + + if (lcaller < rcaller) + return (-1); + + if (lcaller > rcaller) + return (1); + + if (lhs->lkb_data < rhs->lkb_data) + return (-1); + + if (lhs->lkb_data > rhs->lkb_data) + return (1); + + return (0); +} + +/*ARGSUSED*/ +int +leaky_subr_estimate(size_t *estp) +{ + if (umem_ready == 0) { + mdb_warn( + "findleaks: umem is not loaded in the address space\n"); + return (DCMD_ERR); + } + + if (umem_ready == UMEM_READY_INIT_FAILED) { + mdb_warn("findleaks: umem initialization failed -- no " + "possible leaks.\n"); + return (DCMD_ERR); + } + + if (umem_ready != UMEM_READY) { + mdb_warn("findleaks: No allocations have occured -- no " + "possible leaks.\n"); + return (DCMD_ERR); + } + + if (mdb_walk("umem_cache", (mdb_walk_cb_t)leaky_estimate, estp) == -1) { + mdb_warn("couldn't walk 'umem_cache'"); + return (DCMD_ERR); + } + + if (mdb_walk("vmem", (mdb_walk_cb_t)leaky_estimate_vmem, estp) == -1) { + mdb_warn("couldn't walk 'vmem'"); + return (DCMD_ERR); + } + + if (*estp == 0) { + mdb_warn("findleaks: No allocated buffers found.\n"); + return (DCMD_ERR); + } + + prockludge_add_walkers(); + + if (mdb_walk(KLUDGE_MAPWALK_NAME, (mdb_walk_cb_t)leaky_count, + estp) == -1) { + mdb_warn("Couldn't walk "KLUDGE_MAPWALK_NAME); + prockludge_remove_walkers(); + return (DCMD_ERR); + } + + prockludge_remove_walkers(); + + return (DCMD_OK); +} + +int +leaky_subr_fill(leak_mtab_t **lmpp) +{ + if (leaky_handle_anon_mappings(lmpp) != DCMD_OK) { + mdb_warn("unable to process mappings\n"); + return (DCMD_ERR); + } + + if (mdb_walk("vmem", (mdb_walk_cb_t)leaky_vmem, lmpp) == -1) { + mdb_warn("couldn't walk 'vmem'"); + return (DCMD_ERR); + } + + if (mdb_walk("umem_cache", (mdb_walk_cb_t)leaky_cache, lmpp) == -1) { + mdb_warn("couldn't walk 'umem_cache'"); + return (DCMD_ERR); + } + + return (DCMD_OK); +} + +int +leaky_subr_run(void) +{ + if (leaky_process_proc() == DCMD_ERR) { + mdb_warn("failed to process proc"); + return (DCMD_ERR); + } + return (DCMD_OK); +} + +void +leaky_subr_add_leak(leak_mtab_t *lmp) +{ + uintptr_t addr = LKM_CTLPTR(lmp->lkm_bufctl); + uint_t depth; + + vmem_seg_t vs; + umem_bufctl_audit_t *bcp; + UMEM_LOCAL_BUFCTL_AUDIT(&bcp); + + switch (LKM_CTLTYPE(lmp->lkm_bufctl)) { + case LKM_CTL_BUFCTL: + if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { + mdb_warn("couldn't read leaked bufctl at addr %p", + addr); + return; + } + + depth = MIN(bcp->bc_depth, umem_stack_depth); + + /* + * The top of the stack will be in umem_cache_alloc(). + * Since the offset in umem_cache_alloc() isn't interesting + * we skip that frame for the purposes of uniquifying stacks. + * + * Also, we use the cache pointer as the leaks's cid, to + * prevent the coalescing of leaks from different caches. + */ + if (depth > 0) + depth--; + leaky_add_leak(TYPE_UMEM, addr, (uintptr_t)bcp->bc_addr, + bcp->bc_timestamp, bcp->bc_stack + 1, depth, + (uintptr_t)bcp->bc_cache, (uintptr_t)bcp->bc_cache); + break; + case LKM_CTL_VMSEG: + if (mdb_vread(&vs, sizeof (vs), addr) == -1) { + mdb_warn("couldn't read leaked vmem_seg at addr %p", + addr); + return; + } + depth = MIN(vs.vs_depth, VMEM_STACK_DEPTH); + + leaky_add_leak(TYPE_VMEM, addr, vs.vs_start, vs.vs_timestamp, + vs.vs_stack, depth, 0, (vs.vs_end - vs.vs_start)); + break; + case LKM_CTL_MEMORY: + if (LEAKY_INBRK(addr)) + leaky_add_leak(TYPE_SBRK, addr, addr, 0, NULL, 0, 0, + lmp->lkm_limit - addr); + else + leaky_add_leak(TYPE_MMAP, addr, addr, 0, NULL, 0, 0, + lmp->lkm_limit - addr); + break; + case LKM_CTL_CACHE: + leaky_add_leak(TYPE_CACHE, lmp->lkm_base, lmp->lkm_base, 0, + NULL, 0, addr, addr); + break; + default: + mdb_warn("internal error: invalid leak_bufctl_t\n"); + break; + } +} + +static int lk_vmem_seen; +static int lk_cache_seen; +static int lk_umem_seen; +static size_t lk_ttl; +static size_t lk_bytes; + +void +leaky_subr_dump_start(int type) +{ + switch (type) { + case TYPE_MMAP: + lk_vmem_seen = 0; + break; + + case TYPE_SBRK: + case TYPE_VMEM: + return; /* don't zero counts */ + + case TYPE_CACHE: + lk_cache_seen = 0; + break; + + case TYPE_UMEM: + lk_umem_seen = 0; + break; + + default: + break; + } + + lk_ttl = 0; + lk_bytes = 0; +} + +void +leaky_subr_dump(const leak_bufctl_t *lkb, int verbose) +{ + const leak_bufctl_t *cur; + umem_cache_t cache; + size_t min, max, size; + char sz[30]; + char c[MDB_SYM_NAMLEN]; + uintptr_t caller; + const char *nm, *nm_lc; + uint8_t type = lkb->lkb_type; + + if (verbose) { + lk_ttl = 0; + lk_bytes = 0; + } else if (!lk_vmem_seen && (type == TYPE_VMEM || type == TYPE_MMAP || + type == TYPE_SBRK)) { + lk_vmem_seen = 1; + mdb_printf("%-16s %7s %?s %s\n", + "BYTES", "LEAKED", "VMEM_SEG", "CALLER"); + } + + switch (lkb->lkb_type) { + case TYPE_MMAP: + case TYPE_SBRK: + nm = (lkb->lkb_type == TYPE_MMAP) ? "MMAP" : "SBRK"; + nm_lc = (lkb->lkb_type == TYPE_MMAP) ? "mmap(2)" : "sbrk(2)"; + + for (; lkb != NULL; lkb = lkb->lkb_next) { + if (!verbose) + mdb_printf("%-16d %7d %?p %s\n", lkb->lkb_data, + lkb->lkb_dups + 1, lkb->lkb_addr, nm); + else + mdb_printf("%s leak: [%p, %p), %ld bytes\n", + nm_lc, lkb->lkb_addr, + lkb->lkb_addr + lkb->lkb_data, + lkb->lkb_data); + lk_ttl++; + lk_bytes += lkb->lkb_data; + } + return; + + case TYPE_VMEM: + min = max = lkb->lkb_data; + + for (cur = lkb; cur != NULL; cur = cur->lkb_next) { + size = cur->lkb_data; + + if (size < min) + min = size; + if (size > max) + max = size; + + lk_ttl++; + lk_bytes += size; + } + + if (min == max) + (void) mdb_snprintf(sz, sizeof (sz), "%ld", min); + else + (void) mdb_snprintf(sz, sizeof (sz), "%ld-%ld", + min, max); + + if (!verbose) { + leaky_subr_caller(lkb->lkb_stack, lkb->lkb_depth, + c, &caller); + + mdb_printf("%-16s %7d %?p %a\n", sz, lkb->lkb_dups + 1, + lkb->lkb_addr, caller); + } else { + mdb_arg_t v; + + if (lk_ttl == 1) + mdb_printf("umem_oversize leak: 1 vmem_seg, " + "%ld bytes\n", lk_bytes); + else + mdb_printf("umem_oversize leak: %d vmem_segs, " + "%s bytes each, %ld bytes total\n", + lk_ttl, sz, lk_bytes); + + v.a_type = MDB_TYPE_STRING; + v.a_un.a_str = "-v"; + + if (mdb_call_dcmd("vmem_seg", lkb->lkb_addr, + DCMD_ADDRSPEC, 1, &v) == -1) { + mdb_warn("'%p::vmem_seg -v' failed", + lkb->lkb_addr); + } + } + return; + + case TYPE_CACHE: + if (!lk_cache_seen) { + lk_cache_seen = 1; + if (lk_vmem_seen) + mdb_printf("\n"); + mdb_printf("%-?s %7s %?s %s\n", + "CACHE", "LEAKED", "BUFFER", "CALLER"); + } + + if (mdb_vread(&cache, sizeof (cache), lkb->lkb_data) == -1) { + /* + * This _really_ shouldn't happen; we shouldn't + * have been able to get this far if this + * cache wasn't readable. + */ + mdb_warn("can't read cache %p for leaked " + "buffer %p", lkb->lkb_data, lkb->lkb_addr); + return; + } + + lk_ttl += lkb->lkb_dups + 1; + lk_bytes += (lkb->lkb_dups + 1) * cache.cache_bufsize; + + caller = (lkb->lkb_depth == 0) ? 0 : lkb->lkb_stack[0]; + if (caller != 0) { + (void) mdb_snprintf(c, sizeof (c), "%a", caller); + } else { + (void) mdb_snprintf(c, sizeof (c), "%s", + (verbose) ? "" : "?"); + } + + if (!verbose) { + mdb_printf("%0?p %7d %0?p %s\n", lkb->lkb_cid, + lkb->lkb_dups + 1, lkb->lkb_addr, c); + } else { + if (lk_ttl == 1) + mdb_printf("%s leak: 1 buffer, %ld bytes,\n", + cache.cache_name, lk_bytes); + else + mdb_printf("%s leak: %d buffers, " + "%ld bytes each, %ld bytes total,\n", + cache.cache_name, lk_ttl, + cache.cache_bufsize, lk_bytes); + mdb_printf(" %s%s%ssample addr %p\n", + (caller == 0) ? "" : "caller ", c, + (caller == 0) ? "" : ", ", lkb->lkb_addr); + } + return; + + case TYPE_UMEM: + if (!lk_umem_seen) { + lk_umem_seen = 1; + if (lk_vmem_seen || lk_cache_seen) + mdb_printf("\n"); + mdb_printf("%-?s %7s %?s %s\n", + "CACHE", "LEAKED", "BUFCTL", "CALLER"); + } + if (mdb_vread(&cache, sizeof (cache), lkb->lkb_data) == -1) { + /* + * This _really_ shouldn't happen; we shouldn't + * have been able to get this far if this + * cache wasn't readable. + */ + mdb_warn("can't read cache %p for leaked " + "bufctl %p", lkb->lkb_data, lkb->lkb_addr); + return; + } + + lk_ttl += lkb->lkb_dups + 1; + lk_bytes += (lkb->lkb_dups + 1) * cache.cache_bufsize; + + if (!verbose) { + leaky_subr_caller(lkb->lkb_stack, lkb->lkb_depth, c, + &caller); + + mdb_printf("%0?p %7d %0?p %a\n", lkb->lkb_data, + lkb->lkb_dups + 1, lkb->lkb_addr, caller); + } else { + mdb_arg_t v; + + if (lk_ttl == 1) + mdb_printf("%s leak: 1 buffer, %ld bytes\n", + cache.cache_name, lk_bytes); + else + mdb_printf("%s leak: %d buffers, " + "%ld bytes each, %ld bytes total\n", + cache.cache_name, lk_ttl, + cache.cache_bufsize, lk_bytes); + + v.a_type = MDB_TYPE_STRING; + v.a_un.a_str = "-v"; + + if (mdb_call_dcmd("bufctl", lkb->lkb_addr, + DCMD_ADDRSPEC, 1, &v) == -1) { + mdb_warn("'%p::bufctl -v' failed", + lkb->lkb_addr); + } + } + return; + + default: + return; + } +} + +void +leaky_subr_dump_end(int type) +{ + int i; + int width; + const char *leak; + + switch (type) { + case TYPE_VMEM: + if (!lk_vmem_seen) + return; + + width = 16; + leak = "oversized leak"; + break; + + case TYPE_CACHE: + if (!lk_cache_seen) + return; + + width = sizeof (uintptr_t) * 2; + leak = "buffer"; + break; + + case TYPE_UMEM: + if (!lk_umem_seen) + return; + + width = sizeof (uintptr_t) * 2; + leak = "buffer"; + break; + + default: + return; + } + + for (i = 0; i < 72; i++) + mdb_printf("-"); + mdb_printf("\n%*s %7ld %s%s, %ld byte%s\n", + width, "Total", lk_ttl, leak, (lk_ttl == 1) ? "" : "s", + lk_bytes, (lk_bytes == 1) ? "" : "s"); +} + +int +leaky_subr_invoke_callback(const leak_bufctl_t *lkb, mdb_walk_cb_t cb, + void *cbdata) +{ + vmem_seg_t vs; + umem_bufctl_audit_t *bcp; + UMEM_LOCAL_BUFCTL_AUDIT(&bcp); + + switch (lkb->lkb_type) { + case TYPE_VMEM: + if (mdb_vread(&vs, sizeof (vs), lkb->lkb_addr) == -1) { + mdb_warn("unable to read vmem_seg at %p", + lkb->lkb_addr); + return (WALK_NEXT); + } + return (cb(lkb->lkb_addr, &vs, cbdata)); + + case TYPE_UMEM: + if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, + lkb->lkb_addr) == -1) { + mdb_warn("unable to read bufctl at %p", + lkb->lkb_addr); + return (WALK_NEXT); + } + return (cb(lkb->lkb_addr, bcp, cbdata)); + + default: + return (cb(lkb->lkb_addr, NULL, cbdata)); + } +} diff --git a/umemdbg/mdb/common/libumem.c b/umemdbg/mdb/common/libumem.c new file mode 100644 index 0000000..0984edb --- /dev/null +++ b/umemdbg/mdb/common/libumem.c @@ -0,0 +1,610 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +#include "umem.h" +#include +#include + +#include "kgrep.h" +#include "leaky.h" +#include "misc.h" +#include "proc_kludges.h" + +#include +#include +#include +#include + +#include "umem_pagesize.h" + +typedef struct datafmt { + char *hdr1; + char *hdr2; + char *dashes; + char *fmt; +} datafmt_t; + +static datafmt_t ptcfmt[] = { + { " ", "tid", "---", "%3u " }, + { " memory", " cached", "-------", "%7lH " }, + { " %", "cap", "---", "%3u " }, + { " %", NULL, "---", "%3u " }, + { NULL, NULL, NULL, NULL } +}; + +static datafmt_t umemfmt[] = { + { "cache ", "name ", + "-------------------------", "%-25s " }, + { " buf", " size", "------", "%6u " }, + { " buf", " in use", "-------", "%7u " }, + { " buf", " in ptc", "-------", "%7s " }, + { " buf", " total", "-------", "%7u " }, + { " memory", " in use", "-------", "%7H " }, + { " alloc", " succeed", "---------", "%9u " }, + { "alloc", " fail", "-----", "%5llu" }, + { NULL, NULL, NULL, NULL } +}; + +static datafmt_t vmemfmt[] = { + { "vmem ", "name ", + "-------------------------", "%-*s " }, + { " memory", " in use", "---------", "%9H " }, + { " memory", " total", "----------", "%10H " }, + { " memory", " import", "---------", "%9H " }, + { " alloc", " succeed", "---------", "%9llu " }, + { "alloc", " fail", "-----", "%5llu " }, + { NULL, NULL, NULL, NULL } +}; + +/*ARGSUSED*/ +static int +umastat_cpu_avail(uintptr_t addr, const umem_cpu_cache_t *ccp, int *avail) +{ + if (ccp->cc_rounds > 0) + *avail += ccp->cc_rounds; + if (ccp->cc_prounds > 0) + *avail += ccp->cc_prounds; + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_cpu_alloc(uintptr_t addr, const umem_cpu_cache_t *ccp, int *alloc) +{ + *alloc += ccp->cc_alloc; + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_slab_avail(uintptr_t addr, const umem_slab_t *sp, int *avail) +{ + *avail += sp->slab_chunks - sp->slab_refcnt; + + return (WALK_NEXT); +} + +typedef struct umastat_vmem { + uintptr_t kv_addr; + struct umastat_vmem *kv_next; + int kv_meminuse; + int kv_alloc; + int kv_fail; +} umastat_vmem_t; + +/*ARGSUSED*/ +static int +umastat_cache_nptc(uintptr_t addr, const umem_cache_t *cp, int *nptc) +{ + if (!(cp->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + (*nptc)++; + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_cache_hdr(uintptr_t addr, const umem_cache_t *cp, void *ignored) +{ + if (!(cp->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + mdb_printf("%3d ", cp->cache_bufsize); + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_lwp_ptc(uintptr_t addr, void *buf, int *nbufs) +{ + (*nbufs)++; + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_lwp_cache(uintptr_t addr, const umem_cache_t *cp, ulwp_t *ulwp) +{ + char walk[60]; + int nbufs = 0; + + if (!(cp->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + (void) snprintf(walk, sizeof (walk), "umem_ptc_%d", cp->cache_bufsize); + + if (mdb_pwalk(walk, (mdb_walk_cb_t)umastat_lwp_ptc, + &nbufs, (uintptr_t)ulwp->ul_self) == -1) { + mdb_warn("unable to walk '%s'", walk); + return (WALK_ERR); + } + + mdb_printf("%3d ", ulwp->ul_tmem.tm_size ? + (nbufs * cp->cache_bufsize * 100) / ulwp->ul_tmem.tm_size : 0); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_lwp(uintptr_t addr, const ulwp_t *ulwp, void *ignored) +{ + size_t size; + datafmt_t *dfp = ptcfmt; + + mdb_printf((dfp++)->fmt, ulwp->ul_lwpid); + mdb_printf((dfp++)->fmt, ulwp->ul_tmem.tm_size); + + if (umem_readvar(&size, "umem_ptc_size") == -1) { + mdb_warn("unable to read 'umem_ptc_size'"); + return (WALK_ERR); + } + + mdb_printf((dfp++)->fmt, (ulwp->ul_tmem.tm_size * 100) / size); + + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)umastat_lwp_cache, (void *)ulwp) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (WALK_ERR); + } + + mdb_printf("\n"); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_cache_ptc(uintptr_t addr, const void *ignored, int *nptc) +{ + (*nptc)++; + return (WALK_NEXT); +} + +static int +umastat_cache(uintptr_t addr, const umem_cache_t *cp, umastat_vmem_t **kvp) +{ + umastat_vmem_t *kv; + datafmt_t *dfp = umemfmt; + char buf[10]; + int magsize; + + int avail, alloc, total, nptc = 0; + size_t meminuse = (cp->cache_slab_create - cp->cache_slab_destroy) * + cp->cache_slabsize; + + mdb_walk_cb_t cpu_avail = (mdb_walk_cb_t)umastat_cpu_avail; + mdb_walk_cb_t cpu_alloc = (mdb_walk_cb_t)umastat_cpu_alloc; + mdb_walk_cb_t slab_avail = (mdb_walk_cb_t)umastat_slab_avail; + + magsize = umem_get_magsize(cp); + + alloc = cp->cache_slab_alloc + cp->cache_full.ml_alloc; + avail = cp->cache_full.ml_total * magsize; + total = cp->cache_buftotal; + + (void) mdb_pwalk("umem_cpu_cache", cpu_alloc, &alloc, addr); + (void) mdb_pwalk("umem_cpu_cache", cpu_avail, &avail, addr); + (void) mdb_pwalk("umem_slab_partial", slab_avail, &avail, addr); + + if (cp->cache_flags & UMF_PTC) { + char walk[60]; + + (void) snprintf(walk, sizeof (walk), + "umem_ptc_%d", cp->cache_bufsize); + + if (mdb_walk(walk, + (mdb_walk_cb_t)umastat_cache_ptc, &nptc) == -1) { + mdb_warn("unable to walk '%s'", walk); + return (WALK_ERR); + } + + (void) snprintf(buf, sizeof (buf), "%d", nptc); + } + + for (kv = *kvp; kv != NULL; kv = kv->kv_next) { + if (kv->kv_addr == (uintptr_t)cp->cache_arena) + goto out; + } + + kv = mdb_zalloc(sizeof (umastat_vmem_t), UM_SLEEP | UM_GC); + kv->kv_next = *kvp; + kv->kv_addr = (uintptr_t)cp->cache_arena; + *kvp = kv; +out: + kv->kv_meminuse += meminuse; + kv->kv_alloc += alloc; + kv->kv_fail += cp->cache_alloc_fail; + + mdb_printf((dfp++)->fmt, cp->cache_name); + mdb_printf((dfp++)->fmt, cp->cache_bufsize); + mdb_printf((dfp++)->fmt, total - avail); + mdb_printf((dfp++)->fmt, cp->cache_flags & UMF_PTC ? buf : "-"); + mdb_printf((dfp++)->fmt, total); + mdb_printf((dfp++)->fmt, meminuse); + mdb_printf((dfp++)->fmt, alloc); + mdb_printf((dfp++)->fmt, cp->cache_alloc_fail); + mdb_printf("\n"); + + return (WALK_NEXT); +} + +static int +umastat_vmem_totals(uintptr_t addr, const vmem_t *v, umastat_vmem_t *kv) +{ + while (kv != NULL && kv->kv_addr != addr) + kv = kv->kv_next; + + if (kv == NULL || kv->kv_alloc == 0) + return (WALK_NEXT); + + mdb_printf("Total [%s]%*s %6s %7s %7s %7s %7H %9u %5u\n", v->vm_name, + 17 - strlen(v->vm_name), "", "", "", "", "", + kv->kv_meminuse, kv->kv_alloc, kv->kv_fail); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static int +umastat_vmem(uintptr_t addr, const vmem_t *v, void *ignored) +{ + datafmt_t *dfp = vmemfmt; + uintptr_t paddr; + vmem_t parent; + int ident = 0; + + for (paddr = (uintptr_t)v->vm_source; paddr != NULL; ident += 4) { + if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { + mdb_warn("couldn't trace %p's ancestry", addr); + ident = 0; + break; + } + paddr = (uintptr_t)parent.vm_source; + } + + mdb_printf("%*s", ident, ""); + mdb_printf((dfp++)->fmt, 25 - ident, v->vm_name); + mdb_printf((dfp++)->fmt, v->vm_kstat.vk_mem_inuse); + mdb_printf((dfp++)->fmt, v->vm_kstat.vk_mem_total); + mdb_printf((dfp++)->fmt, v->vm_kstat.vk_mem_import); + mdb_printf((dfp++)->fmt, v->vm_kstat.vk_alloc); + mdb_printf((dfp++)->fmt, v->vm_kstat.vk_fail); + + mdb_printf("\n"); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +int +umastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + umastat_vmem_t *kv = NULL; + datafmt_t *dfp; + int nptc = 0, i; + + if (argc != 0) + return (DCMD_USAGE); + + /* + * We need to determine if we have any caches that have per-thread + * caching enabled. + */ + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)umastat_cache_nptc, &nptc) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (DCMD_ERR); + } + + if (nptc) { + for (dfp = ptcfmt; dfp->hdr2 != NULL; dfp++) + mdb_printf("%s ", dfp->hdr1); + + for (i = 0; i < nptc; i++) + mdb_printf("%s ", dfp->hdr1); + + mdb_printf("\n"); + + for (dfp = ptcfmt; dfp->hdr2 != NULL; dfp++) + mdb_printf("%s ", dfp->hdr2); + + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)umastat_cache_hdr, NULL) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (DCMD_ERR); + } + + mdb_printf("\n"); + + for (dfp = ptcfmt; dfp->hdr2 != NULL; dfp++) + mdb_printf("%s ", dfp->dashes); + + for (i = 0; i < nptc; i++) + mdb_printf("%s ", dfp->dashes); + + mdb_printf("\n"); + + if (mdb_walk("ulwp", (mdb_walk_cb_t)umastat_lwp, NULL) == -1) { + mdb_warn("can't walk 'ulwp'"); + return (DCMD_ERR); + } + + mdb_printf("\n"); + } + + for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->hdr1); + mdb_printf("\n"); + + for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->hdr2); + mdb_printf("\n"); + + for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->dashes); + mdb_printf("\n"); + + if (mdb_walk("umem_cache", (mdb_walk_cb_t)umastat_cache, &kv) == -1) { + mdb_warn("can't walk 'umem_cache'"); + return (DCMD_ERR); + } + + for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) + mdb_printf("%s%s", dfp == umemfmt ? "" : " ", dfp->dashes); + mdb_printf("\n"); + + if (mdb_walk("vmem", (mdb_walk_cb_t)umastat_vmem_totals, kv) == -1) { + mdb_warn("can't walk 'vmem'"); + return (DCMD_ERR); + } + + for (dfp = umemfmt; dfp->hdr1 != NULL; dfp++) + mdb_printf("%s ", dfp->dashes); + mdb_printf("\n"); + + mdb_printf("\n"); + + for (dfp = vmemfmt; dfp->hdr1 != NULL; dfp++) + mdb_printf("%s ", dfp->hdr1); + mdb_printf("\n"); + + for (dfp = vmemfmt; dfp->hdr1 != NULL; dfp++) + mdb_printf("%s ", dfp->hdr2); + mdb_printf("\n"); + + for (dfp = vmemfmt; dfp->hdr1 != NULL; dfp++) + mdb_printf("%s ", dfp->dashes); + mdb_printf("\n"); + + if (mdb_walk("vmem", (mdb_walk_cb_t)umastat_vmem, NULL) == -1) { + mdb_warn("can't walk 'vmem'"); + return (DCMD_ERR); + } + + for (dfp = vmemfmt; dfp->hdr1 != NULL; dfp++) + mdb_printf("%s ", dfp->dashes); + mdb_printf("\n"); + return (DCMD_OK); +} + +/* + * kmdb doesn't use libproc, and thus doesn't have any prmap_t's to walk. + * We have other ways to grep kmdb's address range. + */ +#ifndef _KMDB + +typedef struct ugrep_walk_data { + kgrep_cb_func *ug_cb; + void *ug_cbdata; +} ugrep_walk_data_t; + +/*ARGSUSED*/ +int +ugrep_mapping_cb(uintptr_t addr, const void *prm_arg, void *data) +{ + ugrep_walk_data_t *ug = data; + const prmap_t *prm = prm_arg; + + return (ug->ug_cb(prm->pr_vaddr, prm->pr_vaddr + prm->pr_size, + ug->ug_cbdata)); +} + +int +kgrep_subr(kgrep_cb_func *cb, void *cbdata) +{ + ugrep_walk_data_t ug; + + prockludge_add_walkers(); + + ug.ug_cb = cb; + ug.ug_cbdata = cbdata; + + if (mdb_walk(KLUDGE_MAPWALK_NAME, ugrep_mapping_cb, &ug) == -1) { + mdb_warn("Unable to walk "KLUDGE_MAPWALK_NAME); + return (DCMD_ERR); + } + + prockludge_remove_walkers(); + return (DCMD_OK); +} + +size_t +kgrep_subr_pagesize(void) +{ + return (PAGESIZE); +} + +#endif /* !_KMDB */ + +static const mdb_dcmd_t dcmds[] = { + + /* from libumem.c */ + { "umastat", NULL, "umem allocator stats", umastat }, + + /* from misc.c */ + { "umem_debug", NULL, "toggle umem dcmd/walk debugging", umem_debug}, + + /* from umem.c */ + { "umem_status", NULL, "Print umem status and message buffer", + umem_status }, + { "allocdby", ":", "given a thread, print its allocated buffers", + allocdby }, + { "bufctl", ":[-vh] [-a addr] [-c caller] [-e earliest] [-l latest] " + "[-t thd]", "print or filter a bufctl", bufctl, bufctl_help }, + { "bufctl_audit", ":", "print a bufctl_audit", bufctl_audit }, + { "freedby", ":", "given a thread, print its freed buffers", freedby }, + { "umalog", "[ fail | slab ]", + "display umem transaction log and stack traces", umalog }, + { "umausers", "[-ef] [cache ...]", "display current medium and large " + "users of the umem allocator", umausers }, + { "umem_cache", "?", "print a umem cache", umem_cache }, + { "umem_log", "?", "dump umem transaction log", umem_log }, + { "umem_malloc_dist", "[-dg] [-b maxbins] [-B minbinsize]", + "report distribution of outstanding malloc()s", + umem_malloc_dist, umem_malloc_dist_help }, + { "umem_malloc_info", "?[-dg] [-b maxbins] [-B minbinsize]", + "report information about malloc()s by cache", + umem_malloc_info, umem_malloc_info_help }, + { "umem_verify", "?", "check integrity of umem-managed memory", + umem_verify }, + { "vmem", "?", "print a vmem_t", vmem }, + { "vmem_seg", ":[-sv] [-c caller] [-e earliest] [-l latest] " + "[-m minsize] [-M maxsize] [-t thread] [-T type]", + "print or filter a vmem_seg", vmem_seg, vmem_seg_help }, + +#ifndef _KMDB + /* from ../genunix/kgrep.c + libumem.c */ + { "ugrep", KGREP_USAGE, "search user address space for a pointer", + kgrep, kgrep_help }, + + /* from ../genunix/leaky.c + leaky_subr.c */ + { "findleaks", FINDLEAKS_USAGE, "search for potential memory leaks", + findleaks, findleaks_help }, +#endif + + { NULL } +}; + +static const mdb_walker_t walkers[] = { + + /* from umem.c */ + { "allocdby", "given a thread, walk its allocated bufctls", + allocdby_walk_init, allocdby_walk_step, allocdby_walk_fini }, + { "bufctl", "walk a umem cache's bufctls", + bufctl_walk_init, umem_walk_step, umem_walk_fini }, + { "bufctl_history", "walk the available history of a bufctl", + bufctl_history_walk_init, bufctl_history_walk_step, + bufctl_history_walk_fini }, + { "freectl", "walk a umem cache's free bufctls", + freectl_walk_init, umem_walk_step, umem_walk_fini }, + { "freedby", "given a thread, walk its freed bufctls", + freedby_walk_init, allocdby_walk_step, allocdby_walk_fini }, + { "freemem", "walk a umem cache's free memory", + freemem_walk_init, umem_walk_step, umem_walk_fini }, + { "umem", "walk a umem cache", + umem_walk_init, umem_walk_step, umem_walk_fini }, + { "umem_cpu", "walk the umem CPU structures", + umem_cpu_walk_init, umem_cpu_walk_step, umem_cpu_walk_fini }, + { "umem_cpu_cache", "given a umem cache, walk its per-CPU caches", + umem_cpu_cache_walk_init, umem_cpu_cache_walk_step, NULL }, + { "umem_hash", "given a umem cache, walk its allocated hash table", + umem_hash_walk_init, umem_hash_walk_step, umem_hash_walk_fini }, + { "umem_log", "walk the umem transaction log", + umem_log_walk_init, umem_log_walk_step, umem_log_walk_fini }, + { "umem_slab", "given a umem cache, walk its slabs", + umem_slab_walk_init, umem_slab_walk_step, NULL }, + { "umem_slab_partial", + "given a umem cache, walk its partially allocated slabs (min 1)", + umem_slab_walk_partial_init, umem_slab_walk_step, NULL }, + { "vmem", "walk vmem structures in pre-fix, depth-first order", + vmem_walk_init, vmem_walk_step, vmem_walk_fini }, + { "vmem_alloc", "given a vmem_t, walk its allocated vmem_segs", + vmem_alloc_walk_init, vmem_seg_walk_step, vmem_seg_walk_fini }, + { "vmem_free", "given a vmem_t, walk its free vmem_segs", + vmem_free_walk_init, vmem_seg_walk_step, vmem_seg_walk_fini }, + { "vmem_postfix", "walk vmem structures in post-fix, depth-first order", + vmem_walk_init, vmem_postfix_walk_step, vmem_walk_fini }, + { "vmem_seg", "given a vmem_t, walk all of its vmem_segs", + vmem_seg_walk_init, vmem_seg_walk_step, vmem_seg_walk_fini }, + { "vmem_span", "given a vmem_t, walk its spanning vmem_segs", + vmem_span_walk_init, vmem_seg_walk_step, vmem_seg_walk_fini }, + +#ifndef _KMDB + /* from ../genunix/leaky.c + leaky_subr.c */ + { "leak", "given a leak ctl, walk other leaks w/ that stacktrace", + leaky_walk_init, leaky_walk_step, leaky_walk_fini }, + { "leakbuf", "given a leak ctl, walk addr of leaks w/ that stacktrace", + leaky_walk_init, leaky_buf_walk_step, leaky_walk_fini }, +#endif + + { NULL } +}; + +static const mdb_modinfo_t modinfo = {MDB_API_VERSION, dcmds, walkers}; + +const mdb_modinfo_t * +_mdb_init(void) +{ + if (umem_init() != 0) + return (NULL); + + return (&modinfo); +} + +void +_mdb_fini(void) +{ +#ifndef _KMDB + leaky_cleanup(1); +#endif +} diff --git a/umemdbg/mdb/common/misc.c b/umemdbg/mdb/common/misc.c new file mode 100644 index 0000000..bedb166 --- /dev/null +++ b/umemdbg/mdb/common/misc.c @@ -0,0 +1,110 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "misc.h" + +#define UMEM_OBJNAME "libumem.so" + +int umem_debug_level = 0; +int umem_is_standalone = 0; + +/*ARGSUSED*/ +int +umem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + umem_debug_level ^= 1; + + mdb_printf("umem: debugging is now %s\n", + umem_debug_level ? "on" : "off"); + + return (DCMD_OK); +} + +/* + * To further confuse the issue, this dmod can run against either + * libumem.so.1 *or* the libstandumem.so linked into kmdb(1M). To figure + * out which one we are working against, we look up "umem_alloc" in both + * libumem.so and the executable. + * + * A further wrinkle is that libumem.so may not yet be loaded into the + * process' address space. That can lead to either the lookup failing, or + * being unable to read from the data segment. We treat either case as + * an error. + */ +int +umem_set_standalone(void) +{ + GElf_Sym sym; + int ready; + + if (mdb_lookup_by_obj(UMEM_OBJNAME, "umem_alloc", &sym) == 0) + umem_is_standalone = 0; + else if (mdb_lookup_by_obj(MDB_OBJ_EXEC, "umem_alloc", &sym) == 0) + umem_is_standalone = 1; + else + return (-1); + + /* + * now that we know where things should be, make sure we can actually + * read things out. + */ + if (umem_readvar(&ready, "umem_ready") == -1) + return (-1); + return (0); +} + +ssize_t +umem_lookup_by_name(const char *name, GElf_Sym *sym) +{ + return (mdb_lookup_by_obj((umem_is_standalone ? MDB_OBJ_EXEC : + UMEM_OBJNAME), name, sym)); +} + +/* This is like mdb_readvar, only for libumem.so's symbols */ +ssize_t +umem_readvar(void *buf, const char *name) +{ + GElf_Sym sym; + + if (umem_lookup_by_name(name, &sym)) + return (-1); + + if (mdb_vread(buf, sym.st_size, (uintptr_t)sym.st_value) + == sym.st_size) + return ((ssize_t)sym.st_size); + + return (-1); +} + +int +is_umem_sym(const char *sym, const char *prefix) +{ + char *tick_p = strrchr(sym, '`'); + + return (strncmp(sym, "libumem", 7) == 0 && tick_p != NULL && + strncmp(tick_p + 1, prefix, strlen(prefix)) == 0); +} diff --git a/umemdbg/mdb/common/misc.h b/umemdbg/mdb/common/misc.h new file mode 100644 index 0000000..73981ff --- /dev/null +++ b/umemdbg/mdb/common/misc.h @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MDBMOD_MISC_H +#define _MDBMOD_MISC_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define offsetof(s, m) ((size_t)(&(((s *)0)->m))) + +extern int umem_debug(uintptr_t, uint_t, int, const mdb_arg_t *); + +extern int umem_set_standalone(void); +extern ssize_t umem_lookup_by_name(const char *, GElf_Sym *); +extern ssize_t umem_readvar(void *, const char *); + +/* + * Returns non-zero if sym matches libumem*`prefix* + */ +int is_umem_sym(const char *, const char *); + +#define dprintf(x) if (umem_debug_level) { \ + mdb_printf("umem debug: "); \ + /*CSTYLED*/\ + mdb_printf x ;\ +} + +#define dprintf_cont(x) if (umem_debug_level) { \ + /*CSTYLED*/\ + mdb_printf x ;\ +} + +extern int umem_debug_level; + +#ifdef __cplusplus +} +#endif + +#endif /* _MDBMOD_MISC_H */ diff --git a/umemdbg/mdb/common/proc_kludges.c b/umemdbg/mdb/common/proc_kludges.c new file mode 100644 index 0000000..7faef6f --- /dev/null +++ b/umemdbg/mdb/common/proc_kludges.c @@ -0,0 +1,165 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2000-2002 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include +#include + +#include +#include + +#include "proc_kludges.h" + +typedef struct prockuldge_mappings { + struct ps_prochandle *pkm_Pr; + + uint_t pkm_idx; + + uint_t pkm_count; + uint_t pkm_max; + + prmap_t *pkm_mappings; + + uint_t pkm_old_max; + prmap_t *pkm_old_mappings; +} prockludge_mappings_t; + +/* ARGSUSED */ +static int +prockludge_mappings_iter(prockludge_mappings_t *pkm, const prmap_t *pmp, + const char *object_name) +{ + if (pkm->pkm_count >= pkm->pkm_max) { + int s = pkm->pkm_max ? pkm->pkm_max * 2 : 16; + + pkm->pkm_old_max = pkm->pkm_max; + pkm->pkm_old_mappings = pkm->pkm_mappings; + pkm->pkm_max = s; + pkm->pkm_mappings = mdb_alloc(sizeof (prmap_t) * s, UM_SLEEP); + + bcopy(pkm->pkm_old_mappings, pkm->pkm_mappings, + sizeof (prmap_t) * pkm->pkm_old_max); + + mdb_free(pkm->pkm_old_mappings, + sizeof (prmap_t) * pkm->pkm_old_max); + + pkm->pkm_old_mappings = NULL; + pkm->pkm_old_max = 0; + } + bcopy(pmp, &pkm->pkm_mappings[pkm->pkm_count++], sizeof (prmap_t)); + + return (0); +} + +int +prockludge_mappings_walk_init(mdb_walk_state_t *mws) +{ + struct ps_prochandle *Pr; + int rc; + + prockludge_mappings_t *pkm; + + if (mdb_get_xdata("pshandle", &Pr, sizeof (Pr)) == -1) { + mdb_warn("couldn't read pshandle xdata"); + return (WALK_ERR); + } + + pkm = mdb_zalloc(sizeof (prockludge_mappings_t), UM_SLEEP); + pkm->pkm_Pr = Pr; + mws->walk_data = pkm; + + rc = Pmapping_iter(Pr, (proc_map_f *)prockludge_mappings_iter, pkm); + if (rc != 0) { + mdb_warn("Pmapping_iter failed"); + /* clean up */ + prockludge_mappings_walk_fini(mws); + return (WALK_ERR); + } + return (WALK_NEXT); +} + +int +prockludge_mappings_walk_step(mdb_walk_state_t *wsp) +{ + prockludge_mappings_t *pkm = wsp->walk_data; + int status; + + if (pkm->pkm_idx >= pkm->pkm_count) + return (WALK_DONE); + + status = wsp->walk_callback(0, &pkm->pkm_mappings[pkm->pkm_idx++], + wsp->walk_cbdata); + + return (status); +} + +void +prockludge_mappings_walk_fini(mdb_walk_state_t *wsp) +{ + prockludge_mappings_t *pkm = wsp->walk_data; + if (pkm != NULL) { + if (pkm->pkm_old_mappings != NULL) { + mdb_free(pkm->pkm_old_mappings, + sizeof (prmap_t) * pkm->pkm_old_max); + } + if (pkm->pkm_mappings && + pkm->pkm_mappings != pkm->pkm_old_mappings) { + mdb_free(pkm->pkm_mappings, + sizeof (prmap_t) * pkm->pkm_max); + } + mdb_free(pkm, sizeof (prockludge_mappings_t)); + } +} + +static int add_count = 0; + +void +prockludge_add_walkers(void) +{ + mdb_walker_t w; + + if (add_count++ == 0) { + w.walk_name = KLUDGE_MAPWALK_NAME; + w.walk_descr = "kludge: walk the process' prmap_ts"; + w.walk_init = prockludge_mappings_walk_init; + w.walk_step = prockludge_mappings_walk_step; + w.walk_fini = prockludge_mappings_walk_fini; + w.walk_init_arg = NULL; + + if (mdb_add_walker(&w) == -1) { + mdb_warn("unable to add walker "KLUDGE_MAPWALK_NAME); + } + } +} + +void +prockludge_remove_walkers(void) +{ + if (--add_count == 0) { + mdb_remove_walker(KLUDGE_MAPWALK_NAME); + } +} diff --git a/umemdbg/mdb/common/proc_kludges.h b/umemdbg/mdb/common/proc_kludges.h new file mode 100644 index 0000000..e4f9c70 --- /dev/null +++ b/umemdbg/mdb/common/proc_kludges.h @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2000-2002 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _PROC_KLUDGES_H +#define _PROC_KLUDGES_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define KLUDGE_MAPWALK_NAME "__prockludge_mappings" + +extern int prockludge_mappings_walk_init(mdb_walk_state_t *); +extern int prockludge_mappings_walk_step(mdb_walk_state_t *); +extern void prockludge_mappings_walk_fini(mdb_walk_state_t *); + +extern void prockludge_add_walkers(void); +extern void prockludge_remove_walkers(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _PROC_KLUDGES_H */ diff --git a/umemdbg/mdb/common/umem.c b/umemdbg/mdb/common/umem.c new file mode 100644 index 0000000..69b003c --- /dev/null +++ b/umemdbg/mdb/common/umem.c @@ -0,0 +1,4349 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + */ + +#include "umem.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "misc.h" +#include "leaky.h" +#include "dist.h" + +#include "umem_pagesize.h" + +#define UM_ALLOCATED 0x1 +#define UM_FREE 0x2 +#define UM_BUFCTL 0x4 +#define UM_HASH 0x8 + +int umem_ready; + +static int umem_stack_depth_warned; +static uint32_t umem_max_ncpus; +uint32_t umem_stack_depth; + +size_t umem_pagesize; + +#define UMEM_READVAR(var) \ + (umem_readvar(&(var), #var) == -1 && \ + (mdb_warn("failed to read "#var), 1)) + +int +umem_update_variables(void) +{ + size_t pagesize; + + /* + * Figure out which type of umem is being used; if it's not there + * yet, succeed quietly. + */ + if (umem_set_standalone() == -1) { + umem_ready = 0; + return (0); /* umem not there yet */ + } + + /* + * Solaris 9 used a different name for umem_max_ncpus. It's + * cheap backwards compatibility to check for both names. + */ + if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 && + umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) { + mdb_warn("unable to read umem_max_ncpus or max_ncpus"); + return (-1); + } + if (UMEM_READVAR(umem_ready)) + return (-1); + if (UMEM_READVAR(umem_stack_depth)) + return (-1); + if (UMEM_READVAR(pagesize)) + return (-1); + + if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) { + if (umem_stack_depth_warned == 0) { + mdb_warn("umem_stack_depth corrupted (%d > %d)\n", + umem_stack_depth, UMEM_MAX_STACK_DEPTH); + umem_stack_depth_warned = 1; + } + umem_stack_depth = 0; + } + + umem_pagesize = pagesize; + + return (0); +} + +static int +umem_ptc_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr == NULL) { + if (mdb_layered_walk("ulwp", wsp) == -1) { + mdb_warn("couldn't walk 'ulwp'"); + return (WALK_ERR); + } + } + + return (WALK_NEXT); +} + +static int +umem_ptc_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t this; + int rval; + + if (wsp->walk_layer != NULL) { + this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self + + (uintptr_t)wsp->walk_arg; + } else { + this = wsp->walk_addr + (uintptr_t)wsp->walk_arg; + } + + for (;;) { + if (mdb_vread(&this, sizeof (void *), this) == -1) { + mdb_warn("couldn't read ptc buffer at %p", this); + return (WALK_ERR); + } + + if (this == NULL) + break; + + rval = wsp->walk_callback(this, &this, wsp->walk_cbdata); + + if (rval != WALK_NEXT) + return (rval); + } + + return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE); +} + +/*ARGSUSED*/ +static int +umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes) +{ + mdb_walker_t w; + char descr[64]; + char name[64]; + int i; + + (void) mdb_snprintf(descr, sizeof (descr), + "walk the %s cache", c->cache_name); + + w.walk_name = c->cache_name; + w.walk_descr = descr; + w.walk_init = umem_walk_init; + w.walk_step = umem_walk_step; + w.walk_fini = umem_walk_fini; + w.walk_init_arg = (void *)addr; + + if (mdb_add_walker(&w) == -1) + mdb_warn("failed to add %s walker", c->cache_name); + + if (!(c->cache_flags & UMF_PTC)) + return (WALK_NEXT); + + /* + * For the per-thread cache walker, the address is the offset in the + * tm_roots[] array of the ulwp_t. + */ + for (i = 0; sizes[i] != 0; i++) { + if (sizes[i] == c->cache_bufsize) + break; + } + + if (sizes[i] == 0) { + mdb_warn("cache %s is cached per-thread, but could not find " + "size in umem_alloc_sizes\n", c->cache_name); + return (WALK_NEXT); + } + + if (i >= NTMEMBASE) { + mdb_warn("index for %s (%d) exceeds root slots (%d)\n", + c->cache_name, i, NTMEMBASE); + return (WALK_NEXT); + } + + (void) mdb_snprintf(name, sizeof (name), + "umem_ptc_%d", c->cache_bufsize); + (void) mdb_snprintf(descr, sizeof (descr), + "walk the per-thread cache for %s", c->cache_name); + + w.walk_name = name; + w.walk_descr = descr; + w.walk_init = umem_ptc_walk_init; + w.walk_step = umem_ptc_walk_step; + w.walk_fini = NULL; + w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]); + + if (mdb_add_walker(&w) == -1) + mdb_warn("failed to add %s walker", w.walk_name); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +static void +umem_statechange_cb(void *arg) +{ + static int been_ready = 0; + GElf_Sym sym; + int *sizes; + +#ifndef _KMDB + leaky_cleanup(1); /* state changes invalidate leaky state */ +#endif + + if (umem_update_variables() == -1) + return; + + if (been_ready) + return; + + if (umem_ready != UMEM_READY) + return; + + been_ready = 1; + + /* + * In order to determine the tm_roots offset of any cache that is + * cached per-thread, we need to have the umem_alloc_sizes array. + * Read this, assuring that it is zero-terminated. + */ + if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { + mdb_warn("unable to lookup 'umem_alloc_sizes'"); + return; + } + + sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC); + + if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) { + mdb_warn("couldn't read 'umem_alloc_sizes'"); + return; + } + + (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes); +} + +int +umem_abort_messages(void) +{ + char *umem_error_buffer; + uint_t umem_error_begin; + GElf_Sym sym; + size_t bufsize; + + if (UMEM_READVAR(umem_error_begin)) + return (DCMD_ERR); + + if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) { + mdb_warn("unable to look up umem_error_buffer"); + return (DCMD_ERR); + } + + bufsize = (size_t)sym.st_size; + + umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC); + + if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value) + != bufsize) { + mdb_warn("unable to read umem_error_buffer"); + return (DCMD_ERR); + } + /* put a zero after the end of the buffer to simplify printing */ + umem_error_buffer[bufsize] = 0; + + if ((umem_error_begin % bufsize) == 0) + mdb_printf("%s\n", umem_error_buffer); + else { + umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0; + mdb_printf("%s%s\n", + &umem_error_buffer[umem_error_begin % bufsize], + umem_error_buffer); + } + + return (DCMD_OK); +} + +static void +umem_log_status(const char *name, umem_log_header_t *val) +{ + umem_log_header_t my_lh; + uintptr_t pos = (uintptr_t)val; + size_t size; + + if (pos == NULL) + return; + + if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) { + mdb_warn("\nunable to read umem_%s_log pointer %p", + name, pos); + return; + } + + size = my_lh.lh_chunksize * my_lh.lh_nchunks; + + if (size % (1024 * 1024) == 0) + mdb_printf("%s=%dm ", name, size / (1024 * 1024)); + else if (size % 1024 == 0) + mdb_printf("%s=%dk ", name, size / 1024); + else + mdb_printf("%s=%d ", name, size); +} + +typedef struct umem_debug_flags { + const char *udf_name; + uint_t udf_flags; + uint_t udf_clear; /* if 0, uses udf_flags */ +} umem_debug_flags_t; + +umem_debug_flags_t umem_status_flags[] = { + { "random", UMF_RANDOMIZE, UMF_RANDOM }, + { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS }, + { "audit", UMF_AUDIT }, + { "guards", UMF_DEADBEEF | UMF_REDZONE }, + { "nosignal", UMF_CHECKSIGNAL }, + { "firewall", UMF_FIREWALL }, + { "lite", UMF_LITE }, + { NULL } +}; + +/*ARGSUSED*/ +int +umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) +{ + int umem_logging; + + umem_log_header_t *umem_transaction_log; + umem_log_header_t *umem_content_log; + umem_log_header_t *umem_failure_log; + umem_log_header_t *umem_slab_log; + + mdb_printf("Status:\t\t%s\n", + umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" : + umem_ready == UMEM_READY_STARTUP ? "uninitialized" : + umem_ready == UMEM_READY_INITING ? "initialization in process" : + umem_ready == UMEM_READY ? "ready and active" : + umem_ready == 0 ? "not loaded into address space" : + "unknown (umem_ready invalid)"); + + if (umem_ready == 0) + return (DCMD_OK); + + mdb_printf("Concurrency:\t%d\n", umem_max_ncpus); + + if (UMEM_READVAR(umem_logging)) + goto err; + if (UMEM_READVAR(umem_transaction_log)) + goto err; + if (UMEM_READVAR(umem_content_log)) + goto err; + if (UMEM_READVAR(umem_failure_log)) + goto err; + if (UMEM_READVAR(umem_slab_log)) + goto err; + + mdb_printf("Logs:\t\t"); + umem_log_status("transaction", umem_transaction_log); + umem_log_status("content", umem_content_log); + umem_log_status("fail", umem_failure_log); + umem_log_status("slab", umem_slab_log); + if (!umem_logging) + mdb_printf("(inactive)"); + mdb_printf("\n"); + + mdb_printf("Message buffer:\n"); + return (umem_abort_messages()); + +err: + mdb_printf("Message buffer:\n"); + (void) umem_abort_messages(); + return (DCMD_ERR); +} + +typedef struct { + uintptr_t ucw_first; + uintptr_t ucw_current; +} umem_cache_walk_t; + +int +umem_cache_walk_init(mdb_walk_state_t *wsp) +{ + umem_cache_walk_t *ucw; + umem_cache_t c; + uintptr_t cp; + GElf_Sym sym; + + if (umem_lookup_by_name("umem_null_cache", &sym) == -1) { + mdb_warn("couldn't find umem_null_cache"); + return (WALK_ERR); + } + + cp = (uintptr_t)sym.st_value; + + if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) { + mdb_warn("couldn't read cache at %p", cp); + return (WALK_ERR); + } + + ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP); + + ucw->ucw_first = cp; + ucw->ucw_current = (uintptr_t)c.cache_next; + wsp->walk_data = ucw; + + return (WALK_NEXT); +} + +int +umem_cache_walk_step(mdb_walk_state_t *wsp) +{ + umem_cache_walk_t *ucw = wsp->walk_data; + umem_cache_t c; + int status; + + if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) { + mdb_warn("couldn't read cache at %p", ucw->ucw_current); + return (WALK_DONE); + } + + status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata); + + if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first) + return (WALK_DONE); + + return (status); +} + +void +umem_cache_walk_fini(mdb_walk_state_t *wsp) +{ + umem_cache_walk_t *ucw = wsp->walk_data; + mdb_free(ucw, sizeof (umem_cache_walk_t)); +} + +typedef struct { + umem_cpu_t *ucw_cpus; + uint32_t ucw_current; + uint32_t ucw_max; +} umem_cpu_walk_state_t; + +int +umem_cpu_walk_init(mdb_walk_state_t *wsp) +{ + umem_cpu_t *umem_cpus; + + umem_cpu_walk_state_t *ucw; + + if (umem_readvar(&umem_cpus, "umem_cpus") == -1) { + mdb_warn("failed to read 'umem_cpus'"); + return (WALK_ERR); + } + + ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP); + + ucw->ucw_cpus = umem_cpus; + ucw->ucw_current = 0; + ucw->ucw_max = umem_max_ncpus; + + wsp->walk_data = ucw; + return (WALK_NEXT); +} + +int +umem_cpu_walk_step(mdb_walk_state_t *wsp) +{ + umem_cpu_t cpu; + umem_cpu_walk_state_t *ucw = wsp->walk_data; + + uintptr_t caddr; + + if (ucw->ucw_current >= ucw->ucw_max) + return (WALK_DONE); + + caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]); + + if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) { + mdb_warn("failed to read cpu %d", ucw->ucw_current); + return (WALK_ERR); + } + + ucw->ucw_current++; + + return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata)); +} + +void +umem_cpu_walk_fini(mdb_walk_state_t *wsp) +{ + umem_cpu_walk_state_t *ucw = wsp->walk_data; + + mdb_free(ucw, sizeof (*ucw)); +} + +int +umem_cpu_cache_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr == NULL) { + mdb_warn("umem_cpu_cache doesn't support global walks"); + return (WALK_ERR); + } + + if (mdb_layered_walk("umem_cpu", wsp) == -1) { + mdb_warn("couldn't walk 'umem_cpu'"); + return (WALK_ERR); + } + + wsp->walk_data = (void *)wsp->walk_addr; + + return (WALK_NEXT); +} + +int +umem_cpu_cache_walk_step(mdb_walk_state_t *wsp) +{ + uintptr_t caddr = (uintptr_t)wsp->walk_data; + const umem_cpu_t *cpu = wsp->walk_layer; + umem_cpu_cache_t cc; + + caddr += cpu->cpu_cache_offset; + + if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) { + mdb_warn("couldn't read umem_cpu_cache at %p", caddr); + return (WALK_ERR); + } + + return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata)); +} + +int +umem_slab_walk_init(mdb_walk_state_t *wsp) +{ + uintptr_t caddr = wsp->walk_addr; + umem_cache_t c; + + if (caddr == NULL) { + mdb_warn("umem_slab doesn't support global walks\n"); + return (WALK_ERR); + } + + if (mdb_vread(&c, sizeof (c), caddr) == -1) { + mdb_warn("couldn't read umem_cache at %p", caddr); + return (WALK_ERR); + } + + wsp->walk_data = + (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); + wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next; + + return (WALK_NEXT); +} + +int +umem_slab_walk_partial_init(mdb_walk_state_t *wsp) +{ + uintptr_t caddr = wsp->walk_addr; + umem_cache_t c; + + if (caddr == NULL) { + mdb_warn("umem_slab_partial doesn't support global walks\n"); + return (WALK_ERR); + } + + if (mdb_vread(&c, sizeof (c), caddr) == -1) { + mdb_warn("couldn't read umem_cache at %p", caddr); + return (WALK_ERR); + } + + wsp->walk_data = + (void *)(caddr + offsetof(umem_cache_t, cache_nullslab)); + wsp->walk_addr = (uintptr_t)c.cache_freelist; + + /* + * Some consumers (umem_walk_step(), in particular) require at + * least one callback if there are any buffers in the cache. So + * if there are *no* partial slabs, report the last full slab, if + * any. + * + * Yes, this is ugly, but it's cleaner than the other possibilities. + */ + if ((uintptr_t)wsp->walk_data == wsp->walk_addr) + wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev; + + return (WALK_NEXT); +} + +int +umem_slab_walk_step(mdb_walk_state_t *wsp) +{ + umem_slab_t s; + uintptr_t addr = wsp->walk_addr; + uintptr_t saddr = (uintptr_t)wsp->walk_data; + uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab); + + if (addr == saddr) + return (WALK_DONE); + + if (mdb_vread(&s, sizeof (s), addr) == -1) { + mdb_warn("failed to read slab at %p", wsp->walk_addr); + return (WALK_ERR); + } + + if ((uintptr_t)s.slab_cache != caddr) { + mdb_warn("slab %p isn't in cache %p (in cache %p)\n", + addr, caddr, s.slab_cache); + return (WALK_ERR); + } + + wsp->walk_addr = (uintptr_t)s.slab_next; + + return (wsp->walk_callback(addr, &s, wsp->walk_cbdata)); +} + +int +umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv) +{ + umem_cache_t c; + + if (!(flags & DCMD_ADDRSPEC)) { + if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) { + mdb_warn("can't walk umem_cache"); + return (DCMD_ERR); + } + return (DCMD_OK); + } + + if (DCMD_HDRSPEC(flags)) + mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME", + "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL"); + + if (mdb_vread(&c, sizeof (c), addr) == -1) { + mdb_warn("couldn't read umem_cache at %p", addr); + return (DCMD_ERR); + } + + mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name, + c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal); + + return (DCMD_OK); +} + +static int +addrcmp(const void *lhs, const void *rhs) +{ + uintptr_t p1 = *((uintptr_t *)lhs); + uintptr_t p2 = *((uintptr_t *)rhs); + + if (p1 < p2) + return (-1); + if (p1 > p2) + return (1); + return (0); +} + +static int +bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs) +{ + const umem_bufctl_audit_t *bcp1 = *lhs; + const umem_bufctl_audit_t *bcp2 = *rhs; + + if (bcp1->bc_timestamp > bcp2->bc_timestamp) + return (-1); + + if (bcp1->bc_timestamp < bcp2->bc_timestamp) + return (1); + + return (0); +} + +typedef struct umem_hash_walk { + uintptr_t *umhw_table; + size_t umhw_nelems; + size_t umhw_pos; + umem_bufctl_t umhw_cur; +} umem_hash_walk_t; + +int +umem_hash_walk_init(mdb_walk_state_t *wsp) +{ + umem_hash_walk_t *umhw; + uintptr_t *hash; + umem_cache_t c; + uintptr_t haddr, addr = wsp->walk_addr; + size_t nelems; + size_t hsize; + + if (addr == NULL) { + mdb_warn("umem_hash doesn't support global walks\n"); + return (WALK_ERR); + } + + if (mdb_vread(&c, sizeof (c), addr) == -1) { + mdb_warn("couldn't read cache at addr %p", addr); + return (WALK_ERR); + } + + if (!(c.cache_flags & UMF_HASH)) { + mdb_warn("cache %p doesn't have a hash table\n", addr); + return (WALK_DONE); /* nothing to do */ + } + + umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP); + umhw->umhw_cur.bc_next = NULL; + umhw->umhw_pos = 0; + + umhw->umhw_nelems = nelems = c.cache_hash_mask + 1; + hsize = nelems * sizeof (uintptr_t); + haddr = (uintptr_t)c.cache_hash_table; + + umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP); + if (mdb_vread(hash, hsize, haddr) == -1) { + mdb_warn("failed to read hash table at %p", haddr); + mdb_free(hash, hsize); + mdb_free(umhw, sizeof (umem_hash_walk_t)); + return (WALK_ERR); + } + + wsp->walk_data = umhw; + + return (WALK_NEXT); +} + +int +umem_hash_walk_step(mdb_walk_state_t *wsp) +{ + umem_hash_walk_t *umhw = wsp->walk_data; + uintptr_t addr = NULL; + + if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) { + while (umhw->umhw_pos < umhw->umhw_nelems) { + if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL) + break; + } + } + if (addr == NULL) + return (WALK_DONE); + + if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) { + mdb_warn("couldn't read umem_bufctl_t at addr %p", addr); + return (WALK_ERR); + } + + return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata)); +} + +void +umem_hash_walk_fini(mdb_walk_state_t *wsp) +{ + umem_hash_walk_t *umhw = wsp->walk_data; + + if (umhw == NULL) + return; + + mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t)); + mdb_free(umhw, sizeof (umem_hash_walk_t)); +} + +/* + * Find the address of the bufctl structure for the address 'buf' in cache + * 'cp', which is at address caddr, and place it in *out. + */ +static int +umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out) +{ + uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf); + umem_bufctl_t *bcp; + umem_bufctl_t bc; + + if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) { + mdb_warn("unable to read hash bucket for %p in cache %p", + buf, caddr); + return (-1); + } + + while (bcp != NULL) { + if (mdb_vread(&bc, sizeof (umem_bufctl_t), + (uintptr_t)bcp) == -1) { + mdb_warn("unable to read bufctl at %p", bcp); + return (-1); + } + if (bc.bc_addr == buf) { + *out = (uintptr_t)bcp; + return (0); + } + bcp = bc.bc_next; + } + + mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr); + return (-1); +} + +int +umem_get_magsize(const umem_cache_t *cp) +{ + uintptr_t addr = (uintptr_t)cp->cache_magtype; + GElf_Sym mt_sym; + umem_magtype_t mt; + int res; + + /* + * if cpu 0 has a non-zero magsize, it must be correct. caches + * with UMF_NOMAGAZINE have disabled their magazine layers, so + * it is okay to return 0 for them. + */ + if ((res = cp->cache_cpu[0].cc_magsize) != 0 || + (cp->cache_flags & UMF_NOMAGAZINE)) + return (res); + + if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) { + mdb_warn("unable to read 'umem_magtype'"); + } else if (addr < mt_sym.st_value || + addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 || + ((addr - mt_sym.st_value) % sizeof (mt)) != 0) { + mdb_warn("cache '%s' has invalid magtype pointer (%p)\n", + cp->cache_name, addr); + return (0); + } + if (mdb_vread(&mt, sizeof (mt), addr) == -1) { + mdb_warn("unable to read magtype at %a", addr); + return (0); + } + return (mt.mt_magsize); +} + +/*ARGSUSED*/ +static int +umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est) +{ + *est -= (sp->slab_chunks - sp->slab_refcnt); + + return (WALK_NEXT); +} + +/* + * Returns an upper bound on the number of allocated buffers in a given + * cache. + */ +size_t +umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp) +{ + int magsize; + size_t cache_est; + + cache_est = cp->cache_buftotal; + + (void) mdb_pwalk("umem_slab_partial", + (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr); + + if ((magsize = umem_get_magsize(cp)) != 0) { + size_t mag_est = cp->cache_full.ml_total * magsize; + + if (cache_est >= mag_est) { + cache_est -= mag_est; + } else { + mdb_warn("cache %p's magazine layer holds more buffers " + "than the slab layer.\n", addr); + } + } + return (cache_est); +} + +#define READMAG_ROUNDS(rounds) { \ + if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \ + mdb_warn("couldn't read magazine at %p", ump); \ + goto fail; \ + } \ + for (i = 0; i < rounds; i++) { \ + maglist[magcnt++] = mp->mag_round[i]; \ + if (magcnt == magmax) { \ + mdb_warn("%d magazines exceeds fudge factor\n", \ + magcnt); \ + goto fail; \ + } \ + } \ +} + +static int +umem_read_magazines(umem_cache_t *cp, uintptr_t addr, + void ***maglistp, size_t *magcntp, size_t *magmaxp) +{ + umem_magazine_t *ump, *mp; + void **maglist = NULL; + int i, cpu; + size_t magsize, magmax, magbsize; + size_t magcnt = 0; + + /* + * Read the magtype out of the cache, after verifying the pointer's + * correctness. + */ + magsize = umem_get_magsize(cp); + if (magsize == 0) { + *maglistp = NULL; + *magcntp = 0; + *magmaxp = 0; + return (0); + } + + /* + * There are several places where we need to go buffer hunting: + * the per-CPU loaded magazine, the per-CPU spare full magazine, + * and the full magazine list in the depot. + * + * For an upper bound on the number of buffers in the magazine + * layer, we have the number of magazines on the cache_full + * list plus at most two magazines per CPU (the loaded and the + * spare). Toss in 100 magazines as a fudge factor in case this + * is live (the number "100" comes from the same fudge factor in + * crash(1M)). + */ + magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize; + magbsize = offsetof(umem_magazine_t, mag_round[magsize]); + + if (magbsize >= PAGESIZE / 2) { + mdb_warn("magazine size for cache %p unreasonable (%x)\n", + addr, magbsize); + return (-1); + } + + maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP); + mp = mdb_alloc(magbsize, UM_SLEEP); + if (mp == NULL || maglist == NULL) + goto fail; + + /* + * First up: the magazines in the depot (i.e. on the cache_full list). + */ + for (ump = cp->cache_full.ml_list; ump != NULL; ) { + READMAG_ROUNDS(magsize); + ump = mp->mag_next; + + if (ump == cp->cache_full.ml_list) + break; /* cache_full list loop detected */ + } + + dprintf(("cache_full list done\n")); + + /* + * Now whip through the CPUs, snagging the loaded magazines + * and full spares. + */ + for (cpu = 0; cpu < umem_max_ncpus; cpu++) { + umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu]; + + dprintf(("reading cpu cache %p\n", + (uintptr_t)ccp - (uintptr_t)cp + addr)); + + if (ccp->cc_rounds > 0 && + (ump = ccp->cc_loaded) != NULL) { + dprintf(("reading %d loaded rounds\n", ccp->cc_rounds)); + READMAG_ROUNDS(ccp->cc_rounds); + } + + if (ccp->cc_prounds > 0 && + (ump = ccp->cc_ploaded) != NULL) { + dprintf(("reading %d previously loaded rounds\n", + ccp->cc_prounds)); + READMAG_ROUNDS(ccp->cc_prounds); + } + } + + dprintf(("magazine layer: %d buffers\n", magcnt)); + + mdb_free(mp, magbsize); + + *maglistp = maglist; + *magcntp = magcnt; + *magmaxp = magmax; + + return (0); + +fail: + if (mp) + mdb_free(mp, magbsize); + if (maglist) + mdb_free(maglist, magmax * sizeof (void *)); + + return (-1); +} + +typedef struct umem_read_ptc_walk { + void **urpw_buf; + size_t urpw_cnt; + size_t urpw_max; +} umem_read_ptc_walk_t; + +/*ARGSUSED*/ +static int +umem_read_ptc_walk_buf(uintptr_t addr, + const void *ignored, umem_read_ptc_walk_t *urpw) +{ + if (urpw->urpw_cnt == urpw->urpw_max) { + size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1; + void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP); + + if (nmax > 1) { + size_t osize = urpw->urpw_max * sizeof (void *); + bcopy(urpw->urpw_buf, new, osize); + mdb_free(urpw->urpw_buf, osize); + } + + urpw->urpw_buf = new; + urpw->urpw_max = nmax; + } + + urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr; + + return (WALK_NEXT); +} + +static int +umem_read_ptc(umem_cache_t *cp, + void ***buflistp, size_t *bufcntp, size_t *bufmaxp) +{ + umem_read_ptc_walk_t urpw; + char walk[60]; + int rval; + + if (!(cp->cache_flags & UMF_PTC)) + return (0); + + (void) snprintf(walk, sizeof (walk), "umem_ptc_%d", cp->cache_bufsize); + + urpw.urpw_buf = *buflistp; + urpw.urpw_cnt = *bufcntp; + urpw.urpw_max = *bufmaxp; + + if ((rval = mdb_walk(walk, + (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) { + mdb_warn("couldn't walk %s", walk); + } + + *buflistp = urpw.urpw_buf; + *bufcntp = urpw.urpw_cnt; + *bufmaxp = urpw.urpw_max; + + return (rval); +} + +static int +umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf) +{ + return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata)); +} + +static int +bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf) +{ + umem_bufctl_audit_t *b; + UMEM_LOCAL_BUFCTL_AUDIT(&b); + + /* + * if UMF_AUDIT is not set, we know that we're looking at a + * umem_bufctl_t. + */ + if (!(cp->cache_flags & UMF_AUDIT) || + mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) { + (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE); + if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) { + mdb_warn("unable to read bufctl at %p", buf); + return (WALK_ERR); + } + } + + return (wsp->walk_callback(buf, b, wsp->walk_cbdata)); +} + +typedef struct umem_walk { + int umw_type; + + uintptr_t umw_addr; /* cache address */ + umem_cache_t *umw_cp; + size_t umw_csize; + + /* + * magazine layer + */ + void **umw_maglist; + size_t umw_max; + size_t umw_count; + size_t umw_pos; + + /* + * slab layer + */ + char *umw_valid; /* to keep track of freed buffers */ + char *umw_ubase; /* buffer for slab data */ +} umem_walk_t; + +static int +umem_walk_init_common(mdb_walk_state_t *wsp, int type) +{ + umem_walk_t *umw; + int csize; + umem_cache_t *cp; + size_t vm_quantum; + + size_t magmax, magcnt; + void **maglist = NULL; + uint_t chunksize, slabsize; + int status = WALK_ERR; + uintptr_t addr = wsp->walk_addr; + const char *layered; + + type &= ~UM_HASH; + + if (addr == NULL) { + mdb_warn("umem walk doesn't support global walks\n"); + return (WALK_ERR); + } + + dprintf(("walking %p\n", addr)); + + /* + * The number of "cpus" determines how large the cache is. + */ + csize = UMEM_CACHE_SIZE(umem_max_ncpus); + cp = mdb_alloc(csize, UM_SLEEP); + + if (mdb_vread(cp, csize, addr) == -1) { + mdb_warn("couldn't read cache at addr %p", addr); + goto out2; + } + + /* + * It's easy for someone to hand us an invalid cache address. + * Unfortunately, it is hard for this walker to survive an + * invalid cache cleanly. So we make sure that: + * + * 1. the vmem arena for the cache is readable, + * 2. the vmem arena's quantum is a power of 2, + * 3. our slabsize is a multiple of the quantum, and + * 4. our chunksize is >0 and less than our slabsize. + */ + if (mdb_vread(&vm_quantum, sizeof (vm_quantum), + (uintptr_t)&cp->cache_arena->vm_quantum) == -1 || + vm_quantum == 0 || + (vm_quantum & (vm_quantum - 1)) != 0 || + cp->cache_slabsize < vm_quantum || + P2PHASE(cp->cache_slabsize, vm_quantum) != 0 || + cp->cache_chunksize == 0 || + cp->cache_chunksize > cp->cache_slabsize) { + mdb_warn("%p is not a valid umem_cache_t\n", addr); + goto out2; + } + + dprintf(("buf total is %d\n", cp->cache_buftotal)); + + if (cp->cache_buftotal == 0) { + mdb_free(cp, csize); + return (WALK_DONE); + } + + /* + * If they ask for bufctls, but it's a small-slab cache, + * there is nothing to report. + */ + if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) { + dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n", + cp->cache_flags)); + mdb_free(cp, csize); + return (WALK_DONE); + } + + /* + * Read in the contents of the magazine layer + */ + if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0) + goto out2; + + /* + * Read in the contents of the per-thread caches, if any + */ + if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0) + goto out2; + + /* + * We have all of the buffers from the magazines and from the + * per-thread cache (if any); if we are walking allocated buffers, + * sort them so we can bsearch them later. + */ + if (type & UM_ALLOCATED) + qsort(maglist, magcnt, sizeof (void *), addrcmp); + + wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP); + + umw->umw_type = type; + umw->umw_addr = addr; + umw->umw_cp = cp; + umw->umw_csize = csize; + umw->umw_maglist = maglist; + umw->umw_max = magmax; + umw->umw_count = magcnt; + umw->umw_pos = 0; + + /* + * When walking allocated buffers in a UMF_HASH cache, we walk the + * hash table instead of the slab layer. + */ + if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) { + layered = "umem_hash"; + + umw->umw_type |= UM_HASH; + } else { + /* + * If we are walking freed buffers, we only need the + * magazine layer plus the partially allocated slabs. + * To walk allocated buffers, we need all of the slabs. + */ + if (type & UM_ALLOCATED) + layered = "umem_slab"; + else + layered = "umem_slab_partial"; + + /* + * for small-slab caches, we read in the entire slab. For + * freed buffers, we can just walk the freelist. For + * allocated buffers, we use a 'valid' array to track + * the freed buffers. + */ + if (!(cp->cache_flags & UMF_HASH)) { + chunksize = cp->cache_chunksize; + slabsize = cp->cache_slabsize; + + umw->umw_ubase = mdb_alloc(slabsize + + sizeof (umem_bufctl_t), UM_SLEEP); + + if (type & UM_ALLOCATED) + umw->umw_valid = + mdb_alloc(slabsize / chunksize, UM_SLEEP); + } + } + + status = WALK_NEXT; + + if (mdb_layered_walk(layered, wsp) == -1) { + mdb_warn("unable to start layered '%s' walk", layered); + status = WALK_ERR; + } + +out1: + if (status == WALK_ERR) { + if (umw->umw_valid) + mdb_free(umw->umw_valid, slabsize / chunksize); + + if (umw->umw_ubase) + mdb_free(umw->umw_ubase, slabsize + + sizeof (umem_bufctl_t)); + + if (umw->umw_maglist) + mdb_free(umw->umw_maglist, umw->umw_max * + sizeof (uintptr_t)); + + mdb_free(umw, sizeof (umem_walk_t)); + wsp->walk_data = NULL; + } + +out2: + if (status == WALK_ERR) + mdb_free(cp, csize); + + return (status); +} + +int +umem_walk_step(mdb_walk_state_t *wsp) +{ + umem_walk_t *umw = wsp->walk_data; + int type = umw->umw_type; + umem_cache_t *cp = umw->umw_cp; + + void **maglist = umw->umw_maglist; + int magcnt = umw->umw_count; + + uintptr_t chunksize, slabsize; + uintptr_t addr; + const umem_slab_t *sp; + const umem_bufctl_t *bcp; + umem_bufctl_t bc; + + int chunks; + char *kbase; + void *buf; + int i, ret; + + char *valid, *ubase; + + /* + * first, handle the 'umem_hash' layered walk case + */ + if (type & UM_HASH) { + /* + * We have a buffer which has been allocated out of the + * global layer. We need to make sure that it's not + * actually sitting in a magazine before we report it as + * an allocated buffer. + */ + buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr; + + if (magcnt > 0 && + bsearch(&buf, maglist, magcnt, sizeof (void *), + addrcmp) != NULL) + return (WALK_NEXT); + + if (type & UM_BUFCTL) + return (bufctl_walk_callback(cp, wsp, wsp->walk_addr)); + + return (umem_walk_callback(wsp, (uintptr_t)buf)); + } + + ret = WALK_NEXT; + + addr = umw->umw_addr; + + /* + * If we're walking freed buffers, report everything in the + * magazine layer before processing the first slab. + */ + if ((type & UM_FREE) && magcnt != 0) { + umw->umw_count = 0; /* only do this once */ + for (i = 0; i < magcnt; i++) { + buf = maglist[i]; + + if (type & UM_BUFCTL) { + uintptr_t out; + + if (cp->cache_flags & UMF_BUFTAG) { + umem_buftag_t *btp; + umem_buftag_t tag; + + /* LINTED - alignment */ + btp = UMEM_BUFTAG(cp, buf); + if (mdb_vread(&tag, sizeof (tag), + (uintptr_t)btp) == -1) { + mdb_warn("reading buftag for " + "%p at %p", buf, btp); + continue; + } + out = (uintptr_t)tag.bt_bufctl; + } else { + if (umem_hash_lookup(cp, addr, buf, + &out) == -1) + continue; + } + ret = bufctl_walk_callback(cp, wsp, out); + } else { + ret = umem_walk_callback(wsp, (uintptr_t)buf); + } + + if (ret != WALK_NEXT) + return (ret); + } + } + + /* + * Handle the buffers in the current slab + */ + chunksize = cp->cache_chunksize; + slabsize = cp->cache_slabsize; + + sp = wsp->walk_layer; + chunks = sp->slab_chunks; + kbase = sp->slab_base; + + dprintf(("kbase is %p\n", kbase)); + + if (!(cp->cache_flags & UMF_HASH)) { + valid = umw->umw_valid; + ubase = umw->umw_ubase; + + if (mdb_vread(ubase, chunks * chunksize, + (uintptr_t)kbase) == -1) { + mdb_warn("failed to read slab contents at %p", kbase); + return (WALK_ERR); + } + + /* + * Set up the valid map as fully allocated -- we'll punch + * out the freelist. + */ + if (type & UM_ALLOCATED) + (void) memset(valid, 1, chunks); + } else { + valid = NULL; + ubase = NULL; + } + + /* + * walk the slab's freelist + */ + bcp = sp->slab_head; + + dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks)); + + /* + * since we could be in the middle of allocating a buffer, + * our refcnt could be one higher than it aught. So we + * check one further on the freelist than the count allows. + */ + for (i = sp->slab_refcnt; i <= chunks; i++) { + uint_t ndx; + + dprintf(("bcp is %p\n", bcp)); + + if (bcp == NULL) { + if (i == chunks) + break; + mdb_warn( + "slab %p in cache %p freelist too short by %d\n", + sp, addr, chunks - i); + break; + } + + if (cp->cache_flags & UMF_HASH) { + if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) { + mdb_warn("failed to read bufctl ptr at %p", + bcp); + break; + } + buf = bc.bc_addr; + } else { + /* + * Otherwise the buffer is (or should be) in the slab + * that we've read in; determine its offset in the + * slab, validate that it's not corrupt, and add to + * our base address to find the umem_bufctl_t. (Note + * that we don't need to add the size of the bufctl + * to our offset calculation because of the slop that's + * allocated for the buffer at ubase.) + */ + uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase; + + if (offs > chunks * chunksize) { + mdb_warn("found corrupt bufctl ptr %p" + " in slab %p in cache %p\n", bcp, + wsp->walk_addr, addr); + break; + } + + bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs)); + buf = UMEM_BUF(cp, bcp); + } + + ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize; + + if (ndx > slabsize / cp->cache_bufsize) { + /* + * This is very wrong; we have managed to find + * a buffer in the slab which shouldn't + * actually be here. Emit a warning, and + * try to continue. + */ + mdb_warn("buf %p is out of range for " + "slab %p, cache %p\n", buf, sp, addr); + } else if (type & UM_ALLOCATED) { + /* + * we have found a buffer on the slab's freelist; + * clear its entry + */ + valid[ndx] = 0; + } else { + /* + * Report this freed buffer + */ + if (type & UM_BUFCTL) { + ret = bufctl_walk_callback(cp, wsp, + (uintptr_t)bcp); + } else { + ret = umem_walk_callback(wsp, (uintptr_t)buf); + } + if (ret != WALK_NEXT) + return (ret); + } + + bcp = bc.bc_next; + } + + if (bcp != NULL) { + dprintf(("slab %p in cache %p freelist too long (%p)\n", + sp, addr, bcp)); + } + + /* + * If we are walking freed buffers, the loop above handled reporting + * them. + */ + if (type & UM_FREE) + return (WALK_NEXT); + + if (type & UM_BUFCTL) { + mdb_warn("impossible situation: small-slab UM_BUFCTL walk for " + "cache %p\n", addr); + return (WALK_ERR); + } + + /* + * Report allocated buffers, skipping buffers in the magazine layer. + * We only get this far for small-slab caches. + */ + for (i = 0; ret == WALK_NEXT && i < chunks; i++) { + buf = (char *)kbase + i * chunksize; + + if (!valid[i]) + continue; /* on slab freelist */ + + if (magcnt > 0 && + bsearch(&buf, maglist, magcnt, sizeof (void *), + addrcmp) != NULL) + continue; /* in magazine layer */ + + ret = umem_walk_callback(wsp, (uintptr_t)buf); + } + return (ret); +} + +void +umem_walk_fini(mdb_walk_state_t *wsp) +{ + umem_walk_t *umw = wsp->walk_data; + uintptr_t chunksize; + uintptr_t slabsize; + + if (umw == NULL) + return; + + if (umw->umw_maglist != NULL) + mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *)); + + chunksize = umw->umw_cp->cache_chunksize; + slabsize = umw->umw_cp->cache_slabsize; + + if (umw->umw_valid != NULL) + mdb_free(umw->umw_valid, slabsize / chunksize); + if (umw->umw_ubase != NULL) + mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t)); + + mdb_free(umw->umw_cp, umw->umw_csize); + mdb_free(umw, sizeof (umem_walk_t)); +} + +/*ARGSUSED*/ +static int +umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp) +{ + /* + * Buffers allocated from NOTOUCH caches can also show up as freed + * memory in other caches. This can be a little confusing, so we + * don't walk NOTOUCH caches when walking all caches (thereby assuring + * that "::walk umem" and "::walk freemem" yield disjoint output). + */ + if (c->cache_cflags & UMC_NOTOUCH) + return (WALK_NEXT); + + if (mdb_pwalk(wsp->walk_data, wsp->walk_callback, + wsp->walk_cbdata, addr) == -1) + return (WALK_DONE); + + return (WALK_NEXT); +} + +#define UMEM_WALK_ALL(name, wsp) { \ + wsp->walk_data = (name); \ + if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \ + return (WALK_ERR); \ + return (WALK_DONE); \ +} + +int +umem_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_arg != NULL) + wsp->walk_addr = (uintptr_t)wsp->walk_arg; + + if (wsp->walk_addr == NULL) + UMEM_WALK_ALL("umem", wsp); + return (umem_walk_init_common(wsp, UM_ALLOCATED)); +} + +int +bufctl_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr == NULL) + UMEM_WALK_ALL("bufctl", wsp); + return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL)); +} + +int +freemem_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr == NULL) + UMEM_WALK_ALL("freemem", wsp); + return (umem_walk_init_common(wsp, UM_FREE)); +} + +int +freectl_walk_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr == NULL) + UMEM_WALK_ALL("freectl", wsp); + return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL)); +} + +typedef struct bufctl_history_walk { + void *bhw_next; + umem_cache_t *bhw_cache; + umem_slab_t *bhw_slab; + hrtime_t bhw_timestamp; +} bufctl_history_walk_t; + +int +bufctl_history_walk_init(mdb_walk_state_t *wsp) +{ + bufctl_history_walk_t *bhw; + umem_bufctl_audit_t bc; + umem_bufctl_audit_t bcn; + + if (wsp->walk_addr == NULL) { + mdb_warn("bufctl_history walk doesn't support global walks\n"); + return (WALK_ERR); + } + + if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) { + mdb_warn("unable to read bufctl at %p", wsp->walk_addr); + return (WALK_ERR); + } + + bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP); + bhw->bhw_timestamp = 0; + bhw->bhw_cache = bc.bc_cache; + bhw->bhw_slab = bc.bc_slab; + + /* + * sometimes the first log entry matches the base bufctl; in that + * case, skip the base bufctl. + */ + if (bc.bc_lastlog != NULL && + mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 && + bc.bc_addr == bcn.bc_addr && + bc.bc_cache == bcn.bc_cache && + bc.bc_slab == bcn.bc_slab && + bc.bc_timestamp == bcn.bc_timestamp && + bc.bc_thread == bcn.bc_thread) + bhw->bhw_next = bc.bc_lastlog; + else + bhw->bhw_next = (void *)wsp->walk_addr; + + wsp->walk_addr = (uintptr_t)bc.bc_addr; + wsp->walk_data = bhw; + + return (WALK_NEXT); +} + +int +bufctl_history_walk_step(mdb_walk_state_t *wsp) +{ + bufctl_history_walk_t *bhw = wsp->walk_data; + uintptr_t addr = (uintptr_t)bhw->bhw_next; + uintptr_t baseaddr = wsp->walk_addr; + umem_bufctl_audit_t *b; + UMEM_LOCAL_BUFCTL_AUDIT(&b); + + if (addr == NULL) + return (WALK_DONE); + + if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { + mdb_warn("unable to read bufctl at %p", bhw->bhw_next); + return (WALK_ERR); + } + + /* + * The bufctl is only valid if the address, cache, and slab are + * correct. We also check that the timestamp is decreasing, to + * prevent infinite loops. + */ + if ((uintptr_t)b->bc_addr != baseaddr || + b->bc_cache != bhw->bhw_cache || + b->bc_slab != bhw->bhw_slab || + (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp)) + return (WALK_DONE); + + bhw->bhw_next = b->bc_lastlog; + bhw->bhw_timestamp = b->bc_timestamp; + + return (wsp->walk_callback(addr, b, wsp->walk_cbdata)); +} + +void +bufctl_history_walk_fini(mdb_walk_state_t *wsp) +{ + bufctl_history_walk_t *bhw = wsp->walk_data; + + mdb_free(bhw, sizeof (*bhw)); +} + +typedef struct umem_log_walk { + umem_bufctl_audit_t *ulw_base; + umem_bufctl_audit_t **ulw_sorted; + umem_log_header_t ulw_lh; + size_t ulw_size; + size_t ulw_maxndx; + size_t ulw_ndx; +} umem_log_walk_t; + +int +umem_log_walk_init(mdb_walk_state_t *wsp) +{ + uintptr_t lp = wsp->walk_addr; + umem_log_walk_t *ulw; + umem_log_header_t *lhp; + int maxndx, i, j, k; + + /* + * By default (global walk), walk the umem_transaction_log. Otherwise + * read the log whose umem_log_header_t is stored at walk_addr. + */ + if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) { + mdb_warn("failed to read 'umem_transaction_log'"); + return (WALK_ERR); + } + + if (lp == NULL) { + mdb_warn("log is disabled\n"); + return (WALK_ERR); + } + + ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP); + lhp = &ulw->ulw_lh; + + if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) { + mdb_warn("failed to read log header at %p", lp); + mdb_free(ulw, sizeof (umem_log_walk_t)); + return (WALK_ERR); + } + + ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks; + ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP); + maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1; + + if (mdb_vread(ulw->ulw_base, ulw->ulw_size, + (uintptr_t)lhp->lh_base) == -1) { + mdb_warn("failed to read log at base %p", lhp->lh_base); + mdb_free(ulw->ulw_base, ulw->ulw_size); + mdb_free(ulw, sizeof (umem_log_walk_t)); + return (WALK_ERR); + } + + ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks * + sizeof (umem_bufctl_audit_t *), UM_SLEEP); + + for (i = 0, k = 0; i < lhp->lh_nchunks; i++) { + caddr_t chunk = (caddr_t) + ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize); + + for (j = 0; j < maxndx; j++) { + /* LINTED align */ + ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk; + chunk += UMEM_BUFCTL_AUDIT_SIZE; + } + } + + qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *), + (int(*)(const void *, const void *))bufctlcmp); + + ulw->ulw_maxndx = k; + wsp->walk_data = ulw; + + return (WALK_NEXT); +} + +int +umem_log_walk_step(mdb_walk_state_t *wsp) +{ + umem_log_walk_t *ulw = wsp->walk_data; + umem_bufctl_audit_t *bcp; + + if (ulw->ulw_ndx == ulw->ulw_maxndx) + return (WALK_DONE); + + bcp = ulw->ulw_sorted[ulw->ulw_ndx++]; + + return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base + + (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata)); +} + +void +umem_log_walk_fini(mdb_walk_state_t *wsp) +{ + umem_log_walk_t *ulw = wsp->walk_data; + + mdb_free(ulw->ulw_base, ulw->ulw_size); + mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx * + sizeof (umem_bufctl_audit_t *)); + mdb_free(ulw, sizeof (umem_log_walk_t)); +} + +typedef struct allocdby_bufctl { + uintptr_t abb_addr; + hrtime_t abb_ts; +} allocdby_bufctl_t; + +typedef struct allocdby_walk { + const char *abw_walk; + uintptr_t abw_thread; + size_t abw_nbufs; + size_t abw_size; + allocdby_bufctl_t *abw_buf; + size_t abw_ndx; +} allocdby_walk_t; + +int +allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp, + allocdby_walk_t *abw) +{ + if ((uintptr_t)bcp->bc_thread != abw->abw_thread) + return (WALK_NEXT); + + if (abw->abw_nbufs == abw->abw_size) { + allocdby_bufctl_t *buf; + size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size; + + buf = mdb_zalloc(oldsize << 1, UM_SLEEP); + + bcopy(abw->abw_buf, buf, oldsize); + mdb_free(abw->abw_buf, oldsize); + + abw->abw_size <<= 1; + abw->abw_buf = buf; + } + + abw->abw_buf[abw->abw_nbufs].abb_addr = addr; + abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp; + abw->abw_nbufs++; + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +int +allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw) +{ + if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl, + abw, addr) == -1) { + mdb_warn("couldn't walk bufctl for cache %p", addr); + return (WALK_DONE); + } + + return (WALK_NEXT); +} + +static int +allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs) +{ + if (lhs->abb_ts < rhs->abb_ts) + return (1); + if (lhs->abb_ts > rhs->abb_ts) + return (-1); + return (0); +} + +static int +allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk) +{ + allocdby_walk_t *abw; + + if (wsp->walk_addr == NULL) { + mdb_warn("allocdby walk doesn't support global walks\n"); + return (WALK_ERR); + } + + abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP); + + abw->abw_thread = wsp->walk_addr; + abw->abw_walk = walk; + abw->abw_size = 128; /* something reasonable */ + abw->abw_buf = + mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP); + + wsp->walk_data = abw; + + if (mdb_walk("umem_cache", + (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) { + mdb_warn("couldn't walk umem_cache"); + allocdby_walk_fini(wsp); + return (WALK_ERR); + } + + qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t), + (int(*)(const void *, const void *))allocdby_cmp); + + return (WALK_NEXT); +} + +int +allocdby_walk_init(mdb_walk_state_t *wsp) +{ + return (allocdby_walk_init_common(wsp, "bufctl")); +} + +int +freedby_walk_init(mdb_walk_state_t *wsp) +{ + return (allocdby_walk_init_common(wsp, "freectl")); +} + +int +allocdby_walk_step(mdb_walk_state_t *wsp) +{ + allocdby_walk_t *abw = wsp->walk_data; + uintptr_t addr; + umem_bufctl_audit_t *bcp; + UMEM_LOCAL_BUFCTL_AUDIT(&bcp); + + if (abw->abw_ndx == abw->abw_nbufs) + return (WALK_DONE); + + addr = abw->abw_buf[abw->abw_ndx++].abb_addr; + + if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { + mdb_warn("couldn't read bufctl at %p", addr); + return (WALK_DONE); + } + + return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata)); +} + +void +allocdby_walk_fini(mdb_walk_state_t *wsp) +{ + allocdby_walk_t *abw = wsp->walk_data; + + mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size); + mdb_free(abw, sizeof (allocdby_walk_t)); +} + +/*ARGSUSED*/ +int +allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored) +{ + char c[MDB_SYM_NAMLEN]; + GElf_Sym sym; + int i; + + mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp); + for (i = 0; i < bcp->bc_depth; i++) { + if (mdb_lookup_by_addr(bcp->bc_stack[i], + MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) + continue; + if (is_umem_sym(c, "umem_")) + continue; + mdb_printf("%s+0x%lx", + c, bcp->bc_stack[i] - (uintptr_t)sym.st_value); + break; + } + mdb_printf("\n"); + + return (WALK_NEXT); +} + +static int +allocdby_common(uintptr_t addr, uint_t flags, const char *w) +{ + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER"); + + if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) { + mdb_warn("can't walk '%s' for %p", w, addr); + return (DCMD_ERR); + } + + return (DCMD_OK); +} + +/*ARGSUSED*/ +int +allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + return (allocdby_common(addr, flags, "allocdby")); +} + +/*ARGSUSED*/ +int +freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + return (allocdby_common(addr, flags, "freedby")); +} + +typedef struct whatis_info { + mdb_whatis_t *wi_w; + const umem_cache_t *wi_cache; + const vmem_t *wi_vmem; + vmem_t *wi_msb_arena; + size_t wi_slab_size; + int wi_slab_found; + uint_t wi_freemem; +} whatis_info_t; + +/* call one of our dcmd functions with "-v" and the provided address */ +static void +whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr) +{ + mdb_arg_t a; + a.a_type = MDB_TYPE_STRING; + a.a_un.a_str = "-v"; + + mdb_printf(":\n"); + (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a); +} + +static void +whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr, + uintptr_t baddr) +{ + mdb_whatis_t *w = wi->wi_w; + const umem_cache_t *cp = wi->wi_cache; + int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET); + + int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT)); + + mdb_whatis_report_object(w, maddr, addr, ""); + + if (baddr != 0 && !call_printer) + mdb_printf("bufctl %p ", baddr); + + mdb_printf("%s from %s", + (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name); + + if (call_printer && baddr != 0) { + whatis_call_printer(bufctl, baddr); + return; + } + mdb_printf("\n"); +} + +/*ARGSUSED*/ +static int +whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi) +{ + mdb_whatis_t *w = wi->wi_w; + + uintptr_t cur; + size_t size = wi->wi_cache->cache_bufsize; + + while (mdb_whatis_match(w, addr, size, &cur)) + whatis_print_umem(wi, cur, addr, NULL); + + return (WHATIS_WALKRET(w)); +} + +/*ARGSUSED*/ +static int +whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi) +{ + mdb_whatis_t *w = wi->wi_w; + + uintptr_t cur; + uintptr_t addr = (uintptr_t)bcp->bc_addr; + size_t size = wi->wi_cache->cache_bufsize; + + while (mdb_whatis_match(w, addr, size, &cur)) + whatis_print_umem(wi, cur, addr, baddr); + + return (WHATIS_WALKRET(w)); +} + + +static int +whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi) +{ + mdb_whatis_t *w = wi->wi_w; + + size_t size = vs->vs_end - vs->vs_start; + uintptr_t cur; + + /* We're not interested in anything but alloc and free segments */ + if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE) + return (WALK_NEXT); + + while (mdb_whatis_match(w, vs->vs_start, size, &cur)) { + mdb_whatis_report_object(w, cur, vs->vs_start, ""); + + /* + * If we're not printing it seperately, provide the vmem_seg + * pointer if it has a stack trace. + */ + if ((mdb_whatis_flags(w) & WHATIS_QUIET) && + ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 || + (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) { + mdb_printf("vmem_seg %p ", addr); + } + + mdb_printf("%s from %s vmem arena", + (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed", + wi->wi_vmem->vm_name); + + if (!mdb_whatis_flags(w) & WHATIS_QUIET) + whatis_call_printer(vmem_seg, addr); + else + mdb_printf("\n"); + } + + return (WHATIS_WALKRET(w)); +} + +static int +whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi) +{ + mdb_whatis_t *w = wi->wi_w; + const char *nm = vmem->vm_name; + wi->wi_vmem = vmem; + + if (mdb_whatis_flags(w) & WHATIS_VERBOSE) + mdb_printf("Searching vmem arena %s...\n", nm); + + if (mdb_pwalk("vmem_seg", + (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) { + mdb_warn("can't walk vmem seg for %p", addr); + return (WALK_NEXT); + } + + return (WHATIS_WALKRET(w)); +} + +/*ARGSUSED*/ +static int +whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi) +{ + mdb_whatis_t *w = wi->wi_w; + + /* It must overlap with the slab data, or it's not interesting */ + if (mdb_whatis_overlaps(w, + (uintptr_t)sp->slab_base, wi->wi_slab_size)) { + wi->wi_slab_found++; + return (WALK_DONE); + } + return (WALK_NEXT); +} + +static int +whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) +{ + mdb_whatis_t *w = wi->wi_w; + char *walk, *freewalk; + mdb_walk_cb_t func; + int do_bufctl; + + /* Override the '-b' flag as necessary */ + if (!(c->cache_flags & UMF_HASH)) + do_bufctl = FALSE; /* no bufctls to walk */ + else if (c->cache_flags & UMF_AUDIT) + do_bufctl = TRUE; /* we always want debugging info */ + else + do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0); + + if (do_bufctl) { + walk = "bufctl"; + freewalk = "freectl"; + func = (mdb_walk_cb_t)whatis_walk_bufctl; + } else { + walk = "umem"; + freewalk = "freemem"; + func = (mdb_walk_cb_t)whatis_walk_umem; + } + + wi->wi_cache = c; + + if (mdb_whatis_flags(w) & WHATIS_VERBOSE) + mdb_printf("Searching %s...\n", c->cache_name); + + /* + * If more then two buffers live on each slab, figure out if we're + * interested in anything in any slab before doing the more expensive + * umem/freemem (bufctl/freectl) walkers. + */ + wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor; + if (!(c->cache_flags & UMF_HASH)) + wi->wi_slab_size -= sizeof (umem_slab_t); + + if ((wi->wi_slab_size / c->cache_chunksize) > 2) { + wi->wi_slab_found = 0; + if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi, + addr) == -1) { + mdb_warn("can't find umem_slab walker"); + return (WALK_DONE); + } + if (wi->wi_slab_found == 0) + return (WALK_NEXT); + } + + wi->wi_freemem = FALSE; + if (mdb_pwalk(walk, func, wi, addr) == -1) { + mdb_warn("can't find %s walker", walk); + return (WALK_DONE); + } + + if (mdb_whatis_done(w)) + return (WALK_DONE); + + /* + * We have searched for allocated memory; now search for freed memory. + */ + if (mdb_whatis_flags(w) & WHATIS_VERBOSE) + mdb_printf("Searching %s for free memory...\n", c->cache_name); + + wi->wi_freemem = TRUE; + + if (mdb_pwalk(freewalk, func, wi, addr) == -1) { + mdb_warn("can't find %s walker", freewalk); + return (WALK_DONE); + } + + return (WHATIS_WALKRET(w)); +} + +static int +whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) +{ + if (c->cache_arena == wi->wi_msb_arena || + (c->cache_cflags & UMC_NOTOUCH)) + return (WALK_NEXT); + + return (whatis_walk_cache(addr, c, wi)); +} + +static int +whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) +{ + if (c->cache_arena != wi->wi_msb_arena) + return (WALK_NEXT); + + return (whatis_walk_cache(addr, c, wi)); +} + +static int +whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi) +{ + if (c->cache_arena == wi->wi_msb_arena || + !(c->cache_cflags & UMC_NOTOUCH)) + return (WALK_NEXT); + + return (whatis_walk_cache(addr, c, wi)); +} + +/*ARGSUSED*/ +static int +whatis_run_umem(mdb_whatis_t *w, void *ignored) +{ + whatis_info_t wi; + + bzero(&wi, sizeof (wi)); + wi.wi_w = w; + + /* umem's metadata is allocated from the umem_internal_arena */ + if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1) + mdb_warn("unable to readvar \"umem_internal_arena\""); + + /* + * We process umem caches in the following order: + * + * non-UMC_NOTOUCH, non-metadata (typically the most interesting) + * metadata (can be huge with UMF_AUDIT) + * UMC_NOTOUCH, non-metadata (see umem_walk_all()) + */ + if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch, + &wi) == -1 || + mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata, + &wi) == -1 || + mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch, + &wi) == -1) { + mdb_warn("couldn't find umem_cache walker"); + return (1); + } + return (0); +} + +/*ARGSUSED*/ +static int +whatis_run_vmem(mdb_whatis_t *w, void *ignored) +{ + whatis_info_t wi; + + bzero(&wi, sizeof (wi)); + wi.wi_w = w; + + if (mdb_walk("vmem_postfix", + (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) { + mdb_warn("couldn't find vmem_postfix walker"); + return (1); + } + return (0); +} + +int +umem_init(void) +{ + mdb_walker_t w = { + "umem_cache", "walk list of umem caches", umem_cache_walk_init, + umem_cache_walk_step, umem_cache_walk_fini + }; + + if (mdb_add_walker(&w) == -1) { + mdb_warn("failed to add umem_cache walker"); + return (-1); + } + + if (umem_update_variables() == -1) + return (-1); + + /* install a callback so that our variables are always up-to-date */ + (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL); + umem_statechange_cb(NULL); + + /* + * Register our ::whatis callbacks. + */ + mdb_whatis_register("umem", whatis_run_umem, NULL, + WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID); + mdb_whatis_register("vmem", whatis_run_vmem, NULL, + WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID); + + return (0); +} + +typedef struct umem_log_cpu { + uintptr_t umc_low; + uintptr_t umc_high; +} umem_log_cpu_t; + +int +umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc) +{ + int i; + + for (i = 0; i < umem_max_ncpus; i++) { + if (addr >= umc[i].umc_low && addr < umc[i].umc_high) + break; + } + + if (i == umem_max_ncpus) + mdb_printf(" "); + else + mdb_printf("%3d", i); + + mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr, + b->bc_timestamp, b->bc_thread); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +int +umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + umem_log_header_t lh; + umem_cpu_log_header_t clh; + uintptr_t lhp, clhp; + umem_log_cpu_t *umc; + int i; + + if (umem_readvar(&lhp, "umem_transaction_log") == -1) { + mdb_warn("failed to read 'umem_transaction_log'"); + return (DCMD_ERR); + } + + if (lhp == NULL) { + mdb_warn("no umem transaction log\n"); + return (DCMD_ERR); + } + + if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) { + mdb_warn("failed to read log header at %p", lhp); + return (DCMD_ERR); + } + + clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh); + + umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus, + UM_SLEEP | UM_GC); + + for (i = 0; i < umem_max_ncpus; i++) { + if (mdb_vread(&clh, sizeof (clh), clhp) == -1) { + mdb_warn("cannot read cpu %d's log header at %p", + i, clhp); + return (DCMD_ERR); + } + + umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize + + (uintptr_t)lh.lh_base; + umc[i].umc_high = (uintptr_t)clh.clh_current; + + clhp += sizeof (umem_cpu_log_header_t); + } + + if (DCMD_HDRSPEC(flags)) { + mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", + "BUFADDR", "TIMESTAMP", "THREAD"); + } + + /* + * If we have been passed an address, we'll just print out that + * log entry. + */ + if (flags & DCMD_ADDRSPEC) { + umem_bufctl_audit_t *bp; + UMEM_LOCAL_BUFCTL_AUDIT(&bp); + + if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { + mdb_warn("failed to read bufctl at %p", addr); + return (DCMD_ERR); + } + + (void) umem_log_walk(addr, bp, umc); + + return (DCMD_OK); + } + + if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) { + mdb_warn("can't find umem log walker"); + return (DCMD_ERR); + } + + return (DCMD_OK); +} + +typedef struct bufctl_history_cb { + int bhc_flags; + int bhc_argc; + const mdb_arg_t *bhc_argv; + int bhc_ret; +} bufctl_history_cb_t; + +/*ARGSUSED*/ +static int +bufctl_history_callback(uintptr_t addr, const void *ign, void *arg) +{ + bufctl_history_cb_t *bhc = arg; + + bhc->bhc_ret = + bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv); + + bhc->bhc_flags &= ~DCMD_LOOPFIRST; + + return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE); +} + +void +bufctl_help(void) +{ + mdb_printf("%s\n", +"Display the contents of umem_bufctl_audit_ts, with optional filtering.\n"); + mdb_dec_indent(2); + mdb_printf("%OPTIONS%\n"); + mdb_inc_indent(2); + mdb_printf("%s", +" -v Display the full content of the bufctl, including its stack trace\n" +" -h retrieve the bufctl's transaction history, if available\n" +" -a addr\n" +" filter out bufctls not involving the buffer at addr\n" +" -c caller\n" +" filter out bufctls without the function/PC in their stack trace\n" +" -e earliest\n" +" filter out bufctls timestamped before earliest\n" +" -l latest\n" +" filter out bufctls timestamped after latest\n" +" -t thread\n" +" filter out bufctls not involving thread\n"); +} + +int +bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + uint_t verbose = FALSE; + uint_t history = FALSE; + uint_t in_history = FALSE; + uintptr_t caller = NULL, thread = NULL; + uintptr_t laddr, haddr, baddr = NULL; + hrtime_t earliest = 0, latest = 0; + int i, depth; + char c[MDB_SYM_NAMLEN]; + GElf_Sym sym; + umem_bufctl_audit_t *bcp; + UMEM_LOCAL_BUFCTL_AUDIT(&bcp); + + if (mdb_getopts(argc, argv, + 'v', MDB_OPT_SETBITS, TRUE, &verbose, + 'h', MDB_OPT_SETBITS, TRUE, &history, + 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */ + 'c', MDB_OPT_UINTPTR, &caller, + 't', MDB_OPT_UINTPTR, &thread, + 'e', MDB_OPT_UINT64, &earliest, + 'l', MDB_OPT_UINT64, &latest, + 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc) + return (DCMD_USAGE); + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (in_history && !history) + return (DCMD_USAGE); + + if (history && !in_history) { + mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1), + UM_SLEEP | UM_GC); + bufctl_history_cb_t bhc; + + nargv[0].a_type = MDB_TYPE_STRING; + nargv[0].a_un.a_str = "-H"; /* prevent recursion */ + + for (i = 0; i < argc; i++) + nargv[i + 1] = argv[i]; + + /* + * When in history mode, we treat each element as if it + * were in a seperate loop, so that the headers group + * bufctls with similar histories. + */ + bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST; + bhc.bhc_argc = argc + 1; + bhc.bhc_argv = nargv; + bhc.bhc_ret = DCMD_OK; + + if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc, + addr) == -1) { + mdb_warn("unable to walk bufctl_history"); + return (DCMD_ERR); + } + + if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT)) + mdb_printf("\n"); + + return (bhc.bhc_ret); + } + + if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { + if (verbose) { + mdb_printf("%16s %16s %16s %16s\n" + "%%16s %16s %16s %16s%\n", + "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", + "", "CACHE", "LASTLOG", "CONTENTS"); + } else { + mdb_printf("%%-?s %-?s %-12s %5s %s%\n", + "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER"); + } + } + + if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) { + mdb_warn("couldn't read bufctl at %p", addr); + return (DCMD_ERR); + } + + /* + * Guard against bogus bc_depth in case the bufctl is corrupt or + * the address does not really refer to a bufctl. + */ + depth = MIN(bcp->bc_depth, umem_stack_depth); + + if (caller != NULL) { + laddr = caller; + haddr = caller + sizeof (caller); + + if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c), + &sym) != -1 && caller == (uintptr_t)sym.st_value) { + /* + * We were provided an exact symbol value; any + * address in the function is valid. + */ + laddr = (uintptr_t)sym.st_value; + haddr = (uintptr_t)sym.st_value + sym.st_size; + } + + for (i = 0; i < depth; i++) + if (bcp->bc_stack[i] >= laddr && + bcp->bc_stack[i] < haddr) + break; + + if (i == depth) + return (DCMD_OK); + } + + if (thread != NULL && (uintptr_t)bcp->bc_thread != thread) + return (DCMD_OK); + + if (earliest != 0 && bcp->bc_timestamp < earliest) + return (DCMD_OK); + + if (latest != 0 && bcp->bc_timestamp > latest) + return (DCMD_OK); + + if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr) + return (DCMD_OK); + + if (flags & DCMD_PIPE_OUT) { + mdb_printf("%#r\n", addr); + return (DCMD_OK); + } + + if (verbose) { + mdb_printf( + "%%16p% %16p %16llx %16d\n" + "%16s %16p %16p %16p\n", + addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread, + "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents); + + mdb_inc_indent(17); + for (i = 0; i < depth; i++) + mdb_printf("%a\n", bcp->bc_stack[i]); + mdb_dec_indent(17); + mdb_printf("\n"); + } else { + mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr, + bcp->bc_timestamp, bcp->bc_thread); + + for (i = 0; i < depth; i++) { + if (mdb_lookup_by_addr(bcp->bc_stack[i], + MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1) + continue; + if (is_umem_sym(c, "umem_")) + continue; + mdb_printf(" %a\n", bcp->bc_stack[i]); + break; + } + + if (i >= depth) + mdb_printf("\n"); + } + + return (DCMD_OK); +} + +/*ARGSUSED*/ +int +bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + mdb_arg_t a; + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (argc != 0) + return (DCMD_USAGE); + + a.a_type = MDB_TYPE_STRING; + a.a_un.a_str = "-v"; + + return (bufctl(addr, flags, 1, &a)); +} + +typedef struct umem_verify { + uint64_t *umv_buf; /* buffer to read cache contents into */ + size_t umv_size; /* number of bytes in umv_buf */ + int umv_corruption; /* > 0 if corruption found. */ + int umv_besilent; /* report actual corruption sites */ + struct umem_cache umv_cache; /* the cache we're operating on */ +} umem_verify_t; + +/* + * verify_pattern() + * verify that buf is filled with the pattern pat. + */ +static int64_t +verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat) +{ + /*LINTED*/ + uint64_t *bufend = (uint64_t *)((char *)buf_arg + size); + uint64_t *buf; + + for (buf = buf_arg; buf < bufend; buf++) + if (*buf != pat) + return ((uintptr_t)buf - (uintptr_t)buf_arg); + return (-1); +} + +/* + * verify_buftag() + * verify that btp->bt_bxstat == (bcp ^ pat) + */ +static int +verify_buftag(umem_buftag_t *btp, uintptr_t pat) +{ + return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1); +} + +/* + * verify_free() + * verify the integrity of a free block of memory by checking + * that it is filled with 0xdeadbeef and that its buftag is sane. + */ +/*ARGSUSED1*/ +static int +verify_free(uintptr_t addr, const void *data, void *private) +{ + umem_verify_t *umv = (umem_verify_t *)private; + uint64_t *buf = umv->umv_buf; /* buf to validate */ + int64_t corrupt; /* corruption offset */ + umem_buftag_t *buftagp; /* ptr to buftag */ + umem_cache_t *cp = &umv->umv_cache; + int besilent = umv->umv_besilent; + + /*LINTED*/ + buftagp = UMEM_BUFTAG(cp, buf); + + /* + * Read the buffer to check. + */ + if (mdb_vread(buf, umv->umv_size, addr) == -1) { + if (!besilent) + mdb_warn("couldn't read %p", addr); + return (WALK_NEXT); + } + + if ((corrupt = verify_pattern(buf, cp->cache_verify, + UMEM_FREE_PATTERN)) >= 0) { + if (!besilent) + mdb_printf("buffer %p (free) seems corrupted, at %p\n", + addr, (uintptr_t)addr + corrupt); + goto corrupt; + } + + if ((cp->cache_flags & UMF_HASH) && + buftagp->bt_redzone != UMEM_REDZONE_PATTERN) { + if (!besilent) + mdb_printf("buffer %p (free) seems to " + "have a corrupt redzone pattern\n", addr); + goto corrupt; + } + + /* + * confirm bufctl pointer integrity. + */ + if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) { + if (!besilent) + mdb_printf("buffer %p (free) has a corrupt " + "buftag\n", addr); + goto corrupt; + } + + return (WALK_NEXT); +corrupt: + umv->umv_corruption++; + return (WALK_NEXT); +} + +/* + * verify_alloc() + * Verify that the buftag of an allocated buffer makes sense with respect + * to the buffer. + */ +/*ARGSUSED1*/ +static int +verify_alloc(uintptr_t addr, const void *data, void *private) +{ + umem_verify_t *umv = (umem_verify_t *)private; + umem_cache_t *cp = &umv->umv_cache; + uint64_t *buf = umv->umv_buf; /* buf to validate */ + /*LINTED*/ + umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf); + uint32_t *ip = (uint32_t *)buftagp; + uint8_t *bp = (uint8_t *)buf; + int looks_ok = 0, size_ok = 1; /* flags for finding corruption */ + int besilent = umv->umv_besilent; + + /* + * Read the buffer to check. + */ + if (mdb_vread(buf, umv->umv_size, addr) == -1) { + if (!besilent) + mdb_warn("couldn't read %p", addr); + return (WALK_NEXT); + } + + /* + * There are two cases to handle: + * 1. If the buf was alloc'd using umem_cache_alloc, it will have + * 0xfeedfacefeedface at the end of it + * 2. If the buf was alloc'd using umem_alloc, it will have + * 0xbb just past the end of the region in use. At the buftag, + * it will have 0xfeedface (or, if the whole buffer is in use, + * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on + * endianness), followed by 32 bits containing the offset of the + * 0xbb byte in the buffer. + * + * Finally, the two 32-bit words that comprise the second half of the + * buftag should xor to UMEM_BUFTAG_ALLOC + */ + + if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN) + looks_ok = 1; + else if (!UMEM_SIZE_VALID(ip[1])) + size_ok = 0; + else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE) + looks_ok = 1; + else + size_ok = 0; + + if (!size_ok) { + if (!besilent) + mdb_printf("buffer %p (allocated) has a corrupt " + "redzone size encoding\n", addr); + goto corrupt; + } + + if (!looks_ok) { + if (!besilent) + mdb_printf("buffer %p (allocated) has a corrupt " + "redzone signature\n", addr); + goto corrupt; + } + + if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) { + if (!besilent) + mdb_printf("buffer %p (allocated) has a " + "corrupt buftag\n", addr); + goto corrupt; + } + + return (WALK_NEXT); +corrupt: + umv->umv_corruption++; + return (WALK_NEXT); +} + +/*ARGSUSED2*/ +int +umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + if (flags & DCMD_ADDRSPEC) { + int check_alloc = 0, check_free = 0; + umem_verify_t umv; + + if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache), + addr) == -1) { + mdb_warn("couldn't read umem_cache %p", addr); + return (DCMD_ERR); + } + + umv.umv_size = umv.umv_cache.cache_buftag + + sizeof (umem_buftag_t); + umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC); + umv.umv_corruption = 0; + + if ((umv.umv_cache.cache_flags & UMF_REDZONE)) { + check_alloc = 1; + if (umv.umv_cache.cache_flags & UMF_DEADBEEF) + check_free = 1; + } else { + if (!(flags & DCMD_LOOP)) { + mdb_warn("cache %p (%s) does not have " + "redzone checking enabled\n", addr, + umv.umv_cache.cache_name); + } + return (DCMD_ERR); + } + + if (flags & DCMD_LOOP) { + /* + * table mode, don't print out every corrupt buffer + */ + umv.umv_besilent = 1; + } else { + mdb_printf("Summary for cache '%s'\n", + umv.umv_cache.cache_name); + mdb_inc_indent(2); + umv.umv_besilent = 0; + } + + if (check_alloc) + (void) mdb_pwalk("umem", verify_alloc, &umv, addr); + if (check_free) + (void) mdb_pwalk("freemem", verify_free, &umv, addr); + + if (flags & DCMD_LOOP) { + if (umv.umv_corruption == 0) { + mdb_printf("%-*s %?p clean\n", + UMEM_CACHE_NAMELEN, + umv.umv_cache.cache_name, addr); + } else { + char *s = ""; /* optional s in "buffer[s]" */ + if (umv.umv_corruption > 1) + s = "s"; + + mdb_printf("%-*s %?p %d corrupt buffer%s\n", + UMEM_CACHE_NAMELEN, + umv.umv_cache.cache_name, addr, + umv.umv_corruption, s); + } + } else { + /* + * This is the more verbose mode, when the user has + * type addr::umem_verify. If the cache was clean, + * nothing will have yet been printed. So say something. + */ + if (umv.umv_corruption == 0) + mdb_printf("clean\n"); + + mdb_dec_indent(2); + } + } else { + /* + * If the user didn't specify a cache to verify, we'll walk all + * umem_cache's, specifying ourself as a callback for each... + * this is the equivalent of '::walk umem_cache .::umem_verify' + */ + mdb_printf("%%-*s %-?s %-20s%\n", UMEM_CACHE_NAMELEN, + "Cache Name", "Addr", "Cache Integrity"); + (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL)); + } + + return (DCMD_OK); +} + +typedef struct vmem_node { + struct vmem_node *vn_next; + struct vmem_node *vn_parent; + struct vmem_node *vn_sibling; + struct vmem_node *vn_children; + uintptr_t vn_addr; + int vn_marked; + vmem_t vn_vmem; +} vmem_node_t; + +typedef struct vmem_walk { + vmem_node_t *vw_root; + vmem_node_t *vw_current; +} vmem_walk_t; + +int +vmem_walk_init(mdb_walk_state_t *wsp) +{ + uintptr_t vaddr, paddr; + vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp; + vmem_walk_t *vw; + + if (umem_readvar(&vaddr, "vmem_list") == -1) { + mdb_warn("couldn't read 'vmem_list'"); + return (WALK_ERR); + } + + while (vaddr != NULL) { + vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP); + vp->vn_addr = vaddr; + vp->vn_next = head; + head = vp; + + if (vaddr == wsp->walk_addr) + current = vp; + + if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) { + mdb_warn("couldn't read vmem_t at %p", vaddr); + goto err; + } + + vaddr = (uintptr_t)vp->vn_vmem.vm_next; + } + + for (vp = head; vp != NULL; vp = vp->vn_next) { + + if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) { + vp->vn_sibling = root; + root = vp; + continue; + } + + for (parent = head; parent != NULL; parent = parent->vn_next) { + if (parent->vn_addr != paddr) + continue; + vp->vn_sibling = parent->vn_children; + parent->vn_children = vp; + vp->vn_parent = parent; + break; + } + + if (parent == NULL) { + mdb_warn("couldn't find %p's parent (%p)\n", + vp->vn_addr, paddr); + goto err; + } + } + + vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP); + vw->vw_root = root; + + if (current != NULL) + vw->vw_current = current; + else + vw->vw_current = root; + + wsp->walk_data = vw; + return (WALK_NEXT); +err: + for (vp = head; head != NULL; vp = head) { + head = vp->vn_next; + mdb_free(vp, sizeof (vmem_node_t)); + } + + return (WALK_ERR); +} + +int +vmem_walk_step(mdb_walk_state_t *wsp) +{ + vmem_walk_t *vw = wsp->walk_data; + vmem_node_t *vp; + int rval; + + if ((vp = vw->vw_current) == NULL) + return (WALK_DONE); + + rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); + + if (vp->vn_children != NULL) { + vw->vw_current = vp->vn_children; + return (rval); + } + + do { + vw->vw_current = vp->vn_sibling; + vp = vp->vn_parent; + } while (vw->vw_current == NULL && vp != NULL); + + return (rval); +} + +/* + * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all + * children are visited before their parent. We perform the postfix walk + * iteratively (rather than recursively) to allow mdb to regain control + * after each callback. + */ +int +vmem_postfix_walk_step(mdb_walk_state_t *wsp) +{ + vmem_walk_t *vw = wsp->walk_data; + vmem_node_t *vp = vw->vw_current; + int rval; + + /* + * If this node is marked, then we know that we have already visited + * all of its children. If the node has any siblings, they need to + * be visited next; otherwise, we need to visit the parent. Note + * that vp->vn_marked will only be zero on the first invocation of + * the step function. + */ + if (vp->vn_marked) { + if (vp->vn_sibling != NULL) + vp = vp->vn_sibling; + else if (vp->vn_parent != NULL) + vp = vp->vn_parent; + else { + /* + * We have neither a parent, nor a sibling, and we + * have already been visited; we're done. + */ + return (WALK_DONE); + } + } + + /* + * Before we visit this node, visit its children. + */ + while (vp->vn_children != NULL && !vp->vn_children->vn_marked) + vp = vp->vn_children; + + vp->vn_marked = 1; + vw->vw_current = vp; + rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata); + + return (rval); +} + +void +vmem_walk_fini(mdb_walk_state_t *wsp) +{ + vmem_walk_t *vw = wsp->walk_data; + vmem_node_t *root = vw->vw_root; + int done; + + if (root == NULL) + return; + + if ((vw->vw_root = root->vn_children) != NULL) + vmem_walk_fini(wsp); + + vw->vw_root = root->vn_sibling; + done = (root->vn_sibling == NULL && root->vn_parent == NULL); + mdb_free(root, sizeof (vmem_node_t)); + + if (done) { + mdb_free(vw, sizeof (vmem_walk_t)); + } else { + vmem_walk_fini(wsp); + } +} + +typedef struct vmem_seg_walk { + uint8_t vsw_type; + uintptr_t vsw_start; + uintptr_t vsw_current; +} vmem_seg_walk_t; + +/*ARGSUSED*/ +int +vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name) +{ + vmem_seg_walk_t *vsw; + + if (wsp->walk_addr == NULL) { + mdb_warn("vmem_%s does not support global walks\n", name); + return (WALK_ERR); + } + + wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP); + + vsw->vsw_type = type; + vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0); + vsw->vsw_current = vsw->vsw_start; + + return (WALK_NEXT); +} + +/* + * vmem segments can't have type 0 (this should be added to vmem_impl.h). + */ +#define VMEM_NONE 0 + +int +vmem_alloc_walk_init(mdb_walk_state_t *wsp) +{ + return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc")); +} + +int +vmem_free_walk_init(mdb_walk_state_t *wsp) +{ + return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free")); +} + +int +vmem_span_walk_init(mdb_walk_state_t *wsp) +{ + return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span")); +} + +int +vmem_seg_walk_init(mdb_walk_state_t *wsp) +{ + return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg")); +} + +int +vmem_seg_walk_step(mdb_walk_state_t *wsp) +{ + vmem_seg_t seg; + vmem_seg_walk_t *vsw = wsp->walk_data; + uintptr_t addr = vsw->vsw_current; + static size_t seg_size = 0; + int rval; + + if (!seg_size) { + if (umem_readvar(&seg_size, "vmem_seg_size") == -1) { + mdb_warn("failed to read 'vmem_seg_size'"); + seg_size = sizeof (vmem_seg_t); + } + } + + if (seg_size < sizeof (seg)) + bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size); + + if (mdb_vread(&seg, seg_size, addr) == -1) { + mdb_warn("couldn't read vmem_seg at %p", addr); + return (WALK_ERR); + } + + vsw->vsw_current = (uintptr_t)seg.vs_anext; + if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) { + rval = WALK_NEXT; + } else { + rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata); + } + + if (vsw->vsw_current == vsw->vsw_start) + return (WALK_DONE); + + return (rval); +} + +void +vmem_seg_walk_fini(mdb_walk_state_t *wsp) +{ + vmem_seg_walk_t *vsw = wsp->walk_data; + + mdb_free(vsw, sizeof (vmem_seg_walk_t)); +} + +#define VMEM_NAMEWIDTH 22 + +int +vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + vmem_t v, parent; + uintptr_t paddr; + int ident = 0; + char c[VMEM_NAMEWIDTH]; + + if (!(flags & DCMD_ADDRSPEC)) { + if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) { + mdb_warn("can't walk vmem"); + return (DCMD_ERR); + } + return (DCMD_OK); + } + + if (DCMD_HDRSPEC(flags)) + mdb_printf("%-?s %-*s %10s %12s %9s %5s\n", + "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE", + "TOTAL", "SUCCEED", "FAIL"); + + if (mdb_vread(&v, sizeof (v), addr) == -1) { + mdb_warn("couldn't read vmem at %p", addr); + return (DCMD_ERR); + } + + for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) { + if (mdb_vread(&parent, sizeof (parent), paddr) == -1) { + mdb_warn("couldn't trace %p's ancestry", addr); + ident = 0; + break; + } + paddr = (uintptr_t)parent.vm_source; + } + + (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name); + + mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n", + addr, VMEM_NAMEWIDTH, c, + v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total, + v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail); + + return (DCMD_OK); +} + +void +vmem_seg_help(void) +{ + mdb_printf("%s\n", +"Display the contents of vmem_seg_ts, with optional filtering.\n" +"\n" +"A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n" +"representing a single chunk of data. Only ALLOC segments have debugging\n" +"information.\n"); + mdb_dec_indent(2); + mdb_printf("%OPTIONS%\n"); + mdb_inc_indent(2); + mdb_printf("%s", +" -v Display the full content of the vmem_seg, including its stack trace\n" +" -s report the size of the segment, instead of the end address\n" +" -c caller\n" +" filter out segments without the function/PC in their stack trace\n" +" -e earliest\n" +" filter out segments timestamped before earliest\n" +" -l latest\n" +" filter out segments timestamped after latest\n" +" -m minsize\n" +" filer out segments smaller than minsize\n" +" -M maxsize\n" +" filer out segments larger than maxsize\n" +" -t thread\n" +" filter out segments not involving thread\n" +" -T type\n" +" filter out segments not of type 'type'\n" +" type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n"); +} + + +/*ARGSUSED*/ +int +vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + vmem_seg_t vs; + uintptr_t *stk = vs.vs_stack; + uintptr_t sz; + uint8_t t; + const char *type = NULL; + GElf_Sym sym; + char c[MDB_SYM_NAMLEN]; + int no_debug; + int i; + int depth; + uintptr_t laddr, haddr; + + uintptr_t caller = NULL, thread = NULL; + uintptr_t minsize = 0, maxsize = 0; + + hrtime_t earliest = 0, latest = 0; + + uint_t size = 0; + uint_t verbose = 0; + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (mdb_getopts(argc, argv, + 'c', MDB_OPT_UINTPTR, &caller, + 'e', MDB_OPT_UINT64, &earliest, + 'l', MDB_OPT_UINT64, &latest, + 's', MDB_OPT_SETBITS, TRUE, &size, + 'm', MDB_OPT_UINTPTR, &minsize, + 'M', MDB_OPT_UINTPTR, &maxsize, + 't', MDB_OPT_UINTPTR, &thread, + 'T', MDB_OPT_STR, &type, + 'v', MDB_OPT_SETBITS, TRUE, &verbose, + NULL) != argc) + return (DCMD_USAGE); + + if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) { + if (verbose) { + mdb_printf("%16s %4s %16s %16s %16s\n" + "%%16s %4s %16s %16s %16s%\n", + "ADDR", "TYPE", "START", "END", "SIZE", + "", "", "THREAD", "TIMESTAMP", ""); + } else { + mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE", + "START", size? "SIZE" : "END", "WHO"); + } + } + + if (mdb_vread(&vs, sizeof (vs), addr) == -1) { + mdb_warn("couldn't read vmem_seg at %p", addr); + return (DCMD_ERR); + } + + if (type != NULL) { + if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0) + t = VMEM_ALLOC; + else if (strcmp(type, "FREE") == 0) + t = VMEM_FREE; + else if (strcmp(type, "SPAN") == 0) + t = VMEM_SPAN; + else if (strcmp(type, "ROTR") == 0 || + strcmp(type, "ROTOR") == 0) + t = VMEM_ROTOR; + else if (strcmp(type, "WLKR") == 0 || + strcmp(type, "WALKER") == 0) + t = VMEM_WALKER; + else { + mdb_warn("\"%s\" is not a recognized vmem_seg type\n", + type); + return (DCMD_ERR); + } + + if (vs.vs_type != t) + return (DCMD_OK); + } + + sz = vs.vs_end - vs.vs_start; + + if (minsize != 0 && sz < minsize) + return (DCMD_OK); + + if (maxsize != 0 && sz > maxsize) + return (DCMD_OK); + + t = vs.vs_type; + depth = vs.vs_depth; + + /* + * debug info, when present, is only accurate for VMEM_ALLOC segments + */ + no_debug = (t != VMEM_ALLOC) || + (depth == 0 || depth > VMEM_STACK_DEPTH); + + if (no_debug) { + if (caller != NULL || thread != NULL || earliest != 0 || + latest != 0) + return (DCMD_OK); /* not enough info */ + } else { + if (caller != NULL) { + laddr = caller; + haddr = caller + sizeof (caller); + + if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, + sizeof (c), &sym) != -1 && + caller == (uintptr_t)sym.st_value) { + /* + * We were provided an exact symbol value; any + * address in the function is valid. + */ + laddr = (uintptr_t)sym.st_value; + haddr = (uintptr_t)sym.st_value + sym.st_size; + } + + for (i = 0; i < depth; i++) + if (vs.vs_stack[i] >= laddr && + vs.vs_stack[i] < haddr) + break; + + if (i == depth) + return (DCMD_OK); + } + + if (thread != NULL && (uintptr_t)vs.vs_thread != thread) + return (DCMD_OK); + + if (earliest != 0 && vs.vs_timestamp < earliest) + return (DCMD_OK); + + if (latest != 0 && vs.vs_timestamp > latest) + return (DCMD_OK); + } + + type = (t == VMEM_ALLOC ? "ALLC" : + t == VMEM_FREE ? "FREE" : + t == VMEM_SPAN ? "SPAN" : + t == VMEM_ROTOR ? "ROTR" : + t == VMEM_WALKER ? "WLKR" : + "????"); + + if (flags & DCMD_PIPE_OUT) { + mdb_printf("%#r\n", addr); + return (DCMD_OK); + } + + if (verbose) { + mdb_printf("%%16p% %4s %16p %16p %16d\n", + addr, type, vs.vs_start, vs.vs_end, sz); + + if (no_debug) + return (DCMD_OK); + + mdb_printf("%16s %4s %16d %16llx\n", + "", "", vs.vs_thread, vs.vs_timestamp); + + mdb_inc_indent(17); + for (i = 0; i < depth; i++) { + mdb_printf("%a\n", stk[i]); + } + mdb_dec_indent(17); + mdb_printf("\n"); + } else { + mdb_printf("%0?p %4s %0?p %0?p", addr, type, + vs.vs_start, size? sz : vs.vs_end); + + if (no_debug) { + mdb_printf("\n"); + return (DCMD_OK); + } + + for (i = 0; i < depth; i++) { + if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY, + c, sizeof (c), &sym) == -1) + continue; + if (is_umem_sym(c, "vmem_")) + continue; + break; + } + mdb_printf(" %a\n", stk[i]); + } + return (DCMD_OK); +} + +/*ARGSUSED*/ +static int +showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest) +{ + char name[UMEM_CACHE_NAMELEN + 1]; + hrtime_t delta; + int i, depth; + + if (bcp->bc_timestamp == 0) + return (WALK_DONE); + + if (*newest == 0) + *newest = bcp->bc_timestamp; + + delta = *newest - bcp->bc_timestamp; + depth = MIN(bcp->bc_depth, umem_stack_depth); + + if (mdb_readstr(name, sizeof (name), (uintptr_t) + &bcp->bc_cache->cache_name) <= 0) + (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache); + + mdb_printf("\nT-%lld.%09lld addr=%p %s\n", + delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name); + + for (i = 0; i < depth; i++) + mdb_printf("\t %a\n", bcp->bc_stack[i]); + + return (WALK_NEXT); +} + +int +umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + const char *logname = "umem_transaction_log"; + hrtime_t newest = 0; + + if ((flags & DCMD_ADDRSPEC) || argc > 1) + return (DCMD_USAGE); + + if (argc > 0) { + if (argv->a_type != MDB_TYPE_STRING) + return (DCMD_USAGE); + if (strcmp(argv->a_un.a_str, "fail") == 0) + logname = "umem_failure_log"; + else if (strcmp(argv->a_un.a_str, "slab") == 0) + logname = "umem_slab_log"; + else + return (DCMD_USAGE); + } + + if (umem_readvar(&addr, logname) == -1) { + mdb_warn("failed to read %s log header pointer"); + return (DCMD_ERR); + } + + if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) { + mdb_warn("failed to walk umem log"); + return (DCMD_ERR); + } + + return (DCMD_OK); +} + +/* + * As the final lure for die-hard crash(1M) users, we provide ::umausers here. + * The first piece is a structure which we use to accumulate umem_cache_t + * addresses of interest. The umc_add is used as a callback for the umem_cache + * walker; we either add all caches, or ones named explicitly as arguments. + */ + +typedef struct umclist { + const char *umc_name; /* Name to match (or NULL) */ + uintptr_t *umc_caches; /* List of umem_cache_t addrs */ + int umc_nelems; /* Num entries in umc_caches */ + int umc_size; /* Size of umc_caches array */ +} umclist_t; + +static int +umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc) +{ + void *p; + int s; + + if (umc->umc_name == NULL || + strcmp(cp->cache_name, umc->umc_name) == 0) { + /* + * If we have a match, grow our array (if necessary), and then + * add the virtual address of the matching cache to our list. + */ + if (umc->umc_nelems >= umc->umc_size) { + s = umc->umc_size ? umc->umc_size * 2 : 256; + p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC); + + bcopy(umc->umc_caches, p, + sizeof (uintptr_t) * umc->umc_size); + + umc->umc_caches = p; + umc->umc_size = s; + } + + umc->umc_caches[umc->umc_nelems++] = addr; + return (umc->umc_name ? WALK_DONE : WALK_NEXT); + } + + return (WALK_NEXT); +} + +/* + * The second piece of ::umausers is a hash table of allocations. Each + * allocation owner is identified by its stack trace and data_size. We then + * track the total bytes of all such allocations, and the number of allocations + * to report at the end. Once we have a list of caches, we walk through the + * allocated bufctls of each, and update our hash table accordingly. + */ + +typedef struct umowner { + struct umowner *umo_head; /* First hash elt in bucket */ + struct umowner *umo_next; /* Next hash elt in chain */ + size_t umo_signature; /* Hash table signature */ + uint_t umo_num; /* Number of allocations */ + size_t umo_data_size; /* Size of each allocation */ + size_t umo_total_size; /* Total bytes of allocation */ + int umo_depth; /* Depth of stack trace */ + uintptr_t *umo_stack; /* Stack trace */ +} umowner_t; + +typedef struct umusers { + const umem_cache_t *umu_cache; /* Current umem cache */ + umowner_t *umu_hash; /* Hash table of owners */ + uintptr_t *umu_stacks; /* stacks for owners */ + int umu_nelems; /* Number of entries in use */ + int umu_size; /* Total number of entries */ +} umusers_t; + +static void +umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp, + size_t size, size_t data_size) +{ + int i, depth = MIN(bcp->bc_depth, umem_stack_depth); + size_t bucket, signature = data_size; + umowner_t *umo, *umoend; + + /* + * If the hash table is full, double its size and rehash everything. + */ + if (umu->umu_nelems >= umu->umu_size) { + int s = umu->umu_size ? umu->umu_size * 2 : 1024; + size_t umowner_size = sizeof (umowner_t); + size_t trace_size = umem_stack_depth * sizeof (uintptr_t); + uintptr_t *new_stacks; + + umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC); + new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC); + + bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size); + bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size); + umu->umu_hash = umo; + umu->umu_stacks = new_stacks; + umu->umu_size = s; + + umoend = umu->umu_hash + umu->umu_size; + for (umo = umu->umu_hash; umo < umoend; umo++) { + umo->umo_head = NULL; + umo->umo_stack = &umu->umu_stacks[ + umem_stack_depth * (umo - umu->umu_hash)]; + } + + umoend = umu->umu_hash + umu->umu_nelems; + for (umo = umu->umu_hash; umo < umoend; umo++) { + bucket = umo->umo_signature & (umu->umu_size - 1); + umo->umo_next = umu->umu_hash[bucket].umo_head; + umu->umu_hash[bucket].umo_head = umo; + } + } + + /* + * Finish computing the hash signature from the stack trace, and then + * see if the owner is in the hash table. If so, update our stats. + */ + for (i = 0; i < depth; i++) + signature += bcp->bc_stack[i]; + + bucket = signature & (umu->umu_size - 1); + + for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) { + if (umo->umo_signature == signature) { + size_t difference = 0; + + difference |= umo->umo_data_size - data_size; + difference |= umo->umo_depth - depth; + + for (i = 0; i < depth; i++) { + difference |= umo->umo_stack[i] - + bcp->bc_stack[i]; + } + + if (difference == 0) { + umo->umo_total_size += size; + umo->umo_num++; + return; + } + } + } + + /* + * If the owner is not yet hashed, grab the next element and fill it + * in based on the allocation information. + */ + umo = &umu->umu_hash[umu->umu_nelems++]; + umo->umo_next = umu->umu_hash[bucket].umo_head; + umu->umu_hash[bucket].umo_head = umo; + + umo->umo_signature = signature; + umo->umo_num = 1; + umo->umo_data_size = data_size; + umo->umo_total_size = size; + umo->umo_depth = depth; + + for (i = 0; i < depth; i++) + umo->umo_stack[i] = bcp->bc_stack[i]; +} + +/* + * When ::umausers is invoked without the -f flag, we simply update our hash + * table with the information from each allocated bufctl. + */ +/*ARGSUSED*/ +static int +umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) +{ + const umem_cache_t *cp = umu->umu_cache; + + umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); + return (WALK_NEXT); +} + +/* + * When ::umausers is invoked with the -f flag, we print out the information + * for each bufctl as well as updating the hash table. + */ +static int +umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu) +{ + int i, depth = MIN(bcp->bc_depth, umem_stack_depth); + const umem_cache_t *cp = umu->umu_cache; + + mdb_printf("size %d, addr %p, thread %p, cache %s\n", + cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name); + + for (i = 0; i < depth; i++) + mdb_printf("\t %a\n", bcp->bc_stack[i]); + + umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize); + return (WALK_NEXT); +} + +/* + * We sort our results by allocation size before printing them. + */ +static int +umownercmp(const void *lp, const void *rp) +{ + const umowner_t *lhs = lp; + const umowner_t *rhs = rp; + + return (rhs->umo_total_size - lhs->umo_total_size); +} + +/* + * The main engine of ::umausers is relatively straightforward: First we + * accumulate our list of umem_cache_t addresses into the umclist_t. Next we + * iterate over the allocated bufctls of each cache in the list. Finally, + * we sort and print our results. + */ +/*ARGSUSED*/ +int +umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + int mem_threshold = 8192; /* Minimum # bytes for printing */ + int cnt_threshold = 100; /* Minimum # blocks for printing */ + int audited_caches = 0; /* Number of UMF_AUDIT caches found */ + int do_all_caches = 1; /* Do all caches (no arguments) */ + int opt_e = FALSE; /* Include "small" users */ + int opt_f = FALSE; /* Print stack traces */ + + mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1; + umowner_t *umo, *umoend; + int i, oelems; + + umclist_t umc; + umusers_t umu; + + if (flags & DCMD_ADDRSPEC) + return (DCMD_USAGE); + + bzero(&umc, sizeof (umc)); + bzero(&umu, sizeof (umu)); + + while ((i = mdb_getopts(argc, argv, + 'e', MDB_OPT_SETBITS, TRUE, &opt_e, + 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) { + + argv += i; /* skip past options we just processed */ + argc -= i; /* adjust argc */ + + if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-') + return (DCMD_USAGE); + + oelems = umc.umc_nelems; + umc.umc_name = argv->a_un.a_str; + (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); + + if (umc.umc_nelems == oelems) { + mdb_warn("unknown umem cache: %s\n", umc.umc_name); + return (DCMD_ERR); + } + + do_all_caches = 0; + argv++; + argc--; + } + + if (opt_e) + mem_threshold = cnt_threshold = 0; + + if (opt_f) + callback = (mdb_walk_cb_t)umause2; + + if (do_all_caches) { + umc.umc_name = NULL; /* match all cache names */ + (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc); + } + + for (i = 0; i < umc.umc_nelems; i++) { + uintptr_t cp = umc.umc_caches[i]; + umem_cache_t c; + + if (mdb_vread(&c, sizeof (c), cp) == -1) { + mdb_warn("failed to read cache at %p", cp); + continue; + } + + if (!(c.cache_flags & UMF_AUDIT)) { + if (!do_all_caches) { + mdb_warn("UMF_AUDIT is not enabled for %s\n", + c.cache_name); + } + continue; + } + + umu.umu_cache = &c; + (void) mdb_pwalk("bufctl", callback, &umu, cp); + audited_caches++; + } + + if (audited_caches == 0 && do_all_caches) { + mdb_warn("UMF_AUDIT is not enabled for any caches\n"); + return (DCMD_ERR); + } + + qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp); + umoend = umu.umu_hash + umu.umu_nelems; + + for (umo = umu.umu_hash; umo < umoend; umo++) { + if (umo->umo_total_size < mem_threshold && + umo->umo_num < cnt_threshold) + continue; + mdb_printf("%lu bytes for %u allocations with data size %lu:\n", + umo->umo_total_size, umo->umo_num, umo->umo_data_size); + for (i = 0; i < umo->umo_depth; i++) + mdb_printf("\t %a\n", umo->umo_stack[i]); + } + + return (DCMD_OK); +} + +struct malloc_data { + uint32_t malloc_size; + uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */ +}; + +#ifdef _LP64 +#define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data)) +#else +#define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data)) +#endif + +typedef struct umem_malloc_info { + size_t um_total; /* total allocated buffers */ + size_t um_malloc; /* malloc buffers */ + size_t um_malloc_size; /* sum of malloc buffer sizes */ + size_t um_malloc_overhead; /* sum of in-chunk overheads */ + + umem_cache_t *um_cp; + + uint_t *um_bucket; +} umem_malloc_info_t; + +static void +umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc, + size_t maxbuckets, size_t minbucketsize, int geometric) +{ + uint64_t um_malloc; + int minb = -1; + int maxb = -1; + int buckets; + int nbucks; + int i; + int b; + const int *distarray; + + minb = (int)minmalloc; + maxb = (int)maxmalloc; + + nbucks = buckets = maxb - minb + 1; + + um_malloc = 0; + for (b = minb; b <= maxb; b++) + um_malloc += um_bucket[b]; + + if (maxbuckets != 0) + buckets = MIN(buckets, maxbuckets); + + if (minbucketsize > 1) { + buckets = MIN(buckets, nbucks/minbucketsize); + if (buckets == 0) { + buckets = 1; + minbucketsize = nbucks; + } + } + + if (geometric) + distarray = dist_geometric(buckets, minb, maxb, minbucketsize); + else + distarray = dist_linear(buckets, minb, maxb); + + dist_print_header("malloc size", 11, "count"); + for (i = 0; i < buckets; i++) { + dist_print_bucket(distarray, i, um_bucket, um_malloc, 11); + } + mdb_printf("\n"); +} + +/* + * A malloc()ed buffer looks like: + * + * <----------- mi.malloc_size ---> + * <----------- cp.cache_bufsize ------------------> + * <----------- cp.cache_chunksize --------------------------------> + * +-------+-----------------------+---------------+---------------+ + * |/tag///| mallocsz |/round-off/////|/debug info////| + * +-------+---------------------------------------+---------------+ + * <-- usable space ------> + * + * mallocsz is the argument to malloc(3C). + * mi.malloc_size is the actual size passed to umem_alloc(), which + * is rounded up to the smallest available cache size, which is + * cache_bufsize. If there is debugging or alignment overhead in + * the cache, that is reflected in a larger cache_chunksize. + * + * The tag at the beginning of the buffer is either 8-bytes or 16-bytes, + * depending upon the ISA's alignment requirements. For 32-bit allocations, + * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes, + * the tag has 8 bytes of padding before it. + * + * 32-byte, 64-byte buffers <= 8 bytes: + * +-------+-------+--------- ... + * |/size//|/stat//| mallocsz ... + * +-------+-------+--------- ... + * ^ + * pointer returned from malloc(3C) + * + * 64-byte buffers > 8 bytes: + * +---------------+-------+-------+--------- ... + * |/padding///////|/size//|/stat//| mallocsz ... + * +---------------+-------+-------+--------- ... + * ^ + * pointer returned from malloc(3C) + * + * The "size" field is "malloc_size", which is mallocsz + the padding. + * The "stat" field is derived from malloc_size, and functions as a + * validation that this buffer is actually from malloc(3C). + */ +/*ARGSUSED*/ +static int +um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump) +{ + struct malloc_data md; + size_t m_addr = addr; + size_t overhead = sizeof (md); + size_t mallocsz; + + ump->um_total++; + +#ifdef _LP64 + if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) { + m_addr += overhead; + overhead += sizeof (md); + } +#endif + + if (mdb_vread(&md, sizeof (md), m_addr) == -1) { + mdb_warn("unable to read malloc header at %p", m_addr); + return (WALK_NEXT); + } + + switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) { + case MALLOC_MAGIC: +#ifdef _LP64 + case MALLOC_SECOND_MAGIC: +#endif + mallocsz = md.malloc_size - overhead; + + ump->um_malloc++; + ump->um_malloc_size += mallocsz; + ump->um_malloc_overhead += overhead; + + /* include round-off and debug overhead */ + ump->um_malloc_overhead += + ump->um_cp->cache_chunksize - md.malloc_size; + + if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET) + ump->um_bucket[mallocsz]++; + + break; + default: + break; + } + + return (WALK_NEXT); +} + +int +get_umem_alloc_sizes(int **out, size_t *out_num) +{ + GElf_Sym sym; + + if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) { + mdb_warn("unable to look up umem_alloc_sizes"); + return (-1); + } + + *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC); + *out_num = sym.st_size / sizeof (int); + + if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) { + mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value); + *out = NULL; + return (-1); + } + + return (0); +} + + +static int +um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump) +{ + if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) + return (WALK_NEXT); + + ump->um_cp = cp; + + if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) == + -1) { + mdb_warn("can't walk 'umem' for cache %p", addr); + return (WALK_ERR); + } + + return (WALK_NEXT); +} + +void +umem_malloc_dist_help(void) +{ + mdb_printf("%s\n", + "report distribution of outstanding malloc()s"); + mdb_dec_indent(2); + mdb_printf("%OPTIONS%\n"); + mdb_inc_indent(2); + mdb_printf("%s", +" -b maxbins\n" +" Use at most maxbins bins for the data\n" +" -B minbinsize\n" +" Make the bins at least minbinsize bytes apart\n" +" -d dump the raw data out, without binning\n" +" -g use geometric binning instead of linear binning\n"); +} + +/*ARGSUSED*/ +int +umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + umem_malloc_info_t mi; + uint_t geometric = 0; + uint_t dump = 0; + size_t maxbuckets = 0; + size_t minbucketsize = 0; + + size_t minalloc = 0; + size_t maxalloc = UMI_MAX_BUCKET; + + if (flags & DCMD_ADDRSPEC) + return (DCMD_USAGE); + + if (mdb_getopts(argc, argv, + 'd', MDB_OPT_SETBITS, TRUE, &dump, + 'g', MDB_OPT_SETBITS, TRUE, &geometric, + 'b', MDB_OPT_UINTPTR, &maxbuckets, + 'B', MDB_OPT_UINTPTR, &minbucketsize, + 0) != argc) + return (DCMD_USAGE); + + bzero(&mi, sizeof (mi)); + mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), + UM_SLEEP | UM_GC); + + if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb, + &mi) == -1) { + mdb_warn("unable to walk 'umem_cache'"); + return (DCMD_ERR); + } + + if (dump) { + int i; + for (i = minalloc; i <= maxalloc; i++) + mdb_printf("%d\t%d\n", i, mi.um_bucket[i]); + + return (DCMD_OK); + } + + umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc, + maxbuckets, minbucketsize, geometric); + + return (DCMD_OK); +} + +void +umem_malloc_info_help(void) +{ + mdb_printf("%s\n", + "report information about malloc()s by cache. "); + mdb_dec_indent(2); + mdb_printf("%OPTIONS%\n"); + mdb_inc_indent(2); + mdb_printf("%s", +" -b maxbins\n" +" Use at most maxbins bins for the data\n" +" -B minbinsize\n" +" Make the bins at least minbinsize bytes apart\n" +" -d dump the raw distribution data without binning\n" +#ifndef _KMDB +" -g use geometric binning instead of linear binning\n" +#endif + ""); +} +int +umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + umem_cache_t c; + umem_malloc_info_t mi; + + int skip = 0; + + size_t maxmalloc; + size_t overhead; + size_t allocated; + size_t avg_malloc; + size_t overhead_pct; /* 1000 * overhead_percent */ + + uint_t verbose = 0; + uint_t dump = 0; + uint_t geometric = 0; + size_t maxbuckets = 0; + size_t minbucketsize = 0; + + int *alloc_sizes; + int idx; + size_t num; + size_t minmalloc; + + if (mdb_getopts(argc, argv, + 'd', MDB_OPT_SETBITS, TRUE, &dump, + 'g', MDB_OPT_SETBITS, TRUE, &geometric, + 'b', MDB_OPT_UINTPTR, &maxbuckets, + 'B', MDB_OPT_UINTPTR, &minbucketsize, + 0) != argc) + return (DCMD_USAGE); + + if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0)) + verbose = 1; + + if (!(flags & DCMD_ADDRSPEC)) { + if (mdb_walk_dcmd("umem_cache", "umem_malloc_info", + argc, argv) == -1) { + mdb_warn("can't walk umem_cache"); + return (DCMD_ERR); + } + return (DCMD_OK); + } + + if (!mdb_vread(&c, sizeof (c), addr)) { + mdb_warn("unable to read cache at %p", addr); + return (DCMD_ERR); + } + + if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) { + if (!(flags & DCMD_LOOP)) + mdb_warn("umem_malloc_info: cache \"%s\" is not used " + "by malloc()\n", c.cache_name); + skip = 1; + } + + /* + * normally, print the header only the first time. In verbose mode, + * print the header on every non-skipped buffer + */ + if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip)) + mdb_printf("%
    %-?s %6s %6s %8s %8s %10s %10s %6s%
\n", + "CACHE", "BUFSZ", "MAXMAL", + "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER"); + + if (skip) + return (DCMD_OK); + + maxmalloc = c.cache_bufsize - sizeof (struct malloc_data); +#ifdef _LP64 + if (c.cache_bufsize > UMEM_SECOND_ALIGN) + maxmalloc -= sizeof (struct malloc_data); +#endif + + bzero(&mi, sizeof (mi)); + mi.um_cp = &c; + if (verbose) + mi.um_bucket = + mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket), + UM_SLEEP | UM_GC); + + if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) == + -1) { + mdb_warn("can't walk 'umem'"); + return (DCMD_ERR); + } + + overhead = mi.um_malloc_overhead; + allocated = mi.um_malloc_size; + + /* do integer round off for the average */ + if (mi.um_malloc != 0) + avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc; + else + avg_malloc = 0; + + /* + * include per-slab overhead + * + * Each slab in a given cache is the same size, and has the same + * number of chunks in it; we read in the first slab on the + * slab list to get the number of chunks for all slabs. To + * compute the per-slab overhead, we just subtract the chunk usage + * from the slabsize: + * + * +------------+-------+-------+ ... --+-------+-------+-------+ + * |////////////| | | ... | |///////|///////| + * |////color///| chunk | chunk | ... | chunk |/color/|/slab//| + * |////////////| | | ... | |///////|///////| + * +------------+-------+-------+ ... --+-------+-------+-------+ + * | \_______chunksize * chunks_____/ | + * \__________________________slabsize__________________________/ + * + * For UMF_HASH caches, there is an additional source of overhead; + * the external umem_slab_t and per-chunk bufctl structures. We + * include those in our per-slab overhead. + * + * Once we have a number for the per-slab overhead, we estimate + * the actual overhead by treating the malloc()ed buffers as if + * they were densely packed: + * + * additional overhead = (# mallocs) * (per-slab) / (chunks); + * + * carefully ordering the multiply before the divide, to avoid + * round-off error. + */ + if (mi.um_malloc != 0) { + umem_slab_t slab; + uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next; + + if (mdb_vread(&slab, sizeof (slab), saddr) == -1) { + mdb_warn("unable to read slab at %p\n", saddr); + } else { + long chunks = slab.slab_chunks; + if (chunks != 0 && c.cache_chunksize != 0 && + chunks <= c.cache_slabsize / c.cache_chunksize) { + uintmax_t perslab = + c.cache_slabsize - + (c.cache_chunksize * chunks); + + if (c.cache_flags & UMF_HASH) { + perslab += sizeof (umem_slab_t) + + chunks * + ((c.cache_flags & UMF_AUDIT) ? + sizeof (umem_bufctl_audit_t) : + sizeof (umem_bufctl_t)); + } + overhead += + (perslab * (uintmax_t)mi.um_malloc)/chunks; + } else { + mdb_warn("invalid #chunks (%d) in slab %p\n", + chunks, saddr); + } + } + } + + if (allocated != 0) + overhead_pct = (1000ULL * overhead) / allocated; + else + overhead_pct = 0; + + mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n", + addr, c.cache_bufsize, maxmalloc, + mi.um_malloc, avg_malloc, allocated, overhead, + overhead_pct / 10, overhead_pct % 10); + + if (!verbose) + return (DCMD_OK); + + if (!dump) + mdb_printf("\n"); + + if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1) + return (DCMD_ERR); + + for (idx = 0; idx < num; idx++) { + if (alloc_sizes[idx] == c.cache_bufsize) + break; + if (alloc_sizes[idx] == 0) { + idx = num; /* 0-terminated array */ + break; + } + } + if (idx == num) { + mdb_warn( + "cache %p's size (%d) not in umem_alloc_sizes\n", + addr, c.cache_bufsize); + return (DCMD_ERR); + } + + minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1]; + if (minmalloc > 0) { +#ifdef _LP64 + if (minmalloc > UMEM_SECOND_ALIGN) + minmalloc -= sizeof (struct malloc_data); +#endif + minmalloc -= sizeof (struct malloc_data); + minmalloc += 1; + } + + if (dump) { + for (idx = minmalloc; idx <= maxmalloc; idx++) + mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]); + mdb_printf("\n"); + } else { + umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc, + maxbuckets, minbucketsize, geometric); + } + + return (DCMD_OK); +} diff --git a/umemdbg/mdb/common/umem.h b/umemdbg/mdb/common/umem.h new file mode 100644 index 0000000..f282b7c --- /dev/null +++ b/umemdbg/mdb/common/umem.h @@ -0,0 +1,140 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MDBMOD_UMEM_H +#define _MDBMOD_UMEM_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern int umem_ready; +extern uint32_t umem_stack_depth; + +extern int umem_cache_walk_init(mdb_walk_state_t *); +extern int umem_cache_walk_step(mdb_walk_state_t *); +extern void umem_cache_walk_fini(mdb_walk_state_t *); + +extern int umem_cpu_walk_init(mdb_walk_state_t *); +extern int umem_cpu_walk_step(mdb_walk_state_t *); +extern void umem_cpu_walk_fini(mdb_walk_state_t *); + +extern int umem_cpu_cache_walk_init(mdb_walk_state_t *); +extern int umem_cpu_cache_walk_step(mdb_walk_state_t *); + +extern int umem_slab_walk_init(mdb_walk_state_t *); +extern int umem_slab_walk_partial_init(mdb_walk_state_t *); +extern int umem_slab_walk_step(mdb_walk_state_t *); + +extern int umem_hash_walk_init(mdb_walk_state_t *wsp); +extern int umem_hash_walk_step(mdb_walk_state_t *wsp); +extern void umem_hash_walk_fini(mdb_walk_state_t *wsp); + +extern int umem_walk_init(mdb_walk_state_t *); +extern int bufctl_walk_init(mdb_walk_state_t *); +extern int freemem_walk_init(mdb_walk_state_t *); +extern int freectl_walk_init(mdb_walk_state_t *); + +extern int umem_walk_step(mdb_walk_state_t *); +extern void umem_walk_fini(mdb_walk_state_t *); + +extern int bufctl_history_walk_init(mdb_walk_state_t *); +extern int bufctl_history_walk_step(mdb_walk_state_t *); +extern void bufctl_history_walk_fini(mdb_walk_state_t *); + +extern int allocdby_walk_init(mdb_walk_state_t *); +extern int allocdby_walk_step(mdb_walk_state_t *); +extern void allocdby_walk_fini(mdb_walk_state_t *); + +extern int freedby_walk_init(mdb_walk_state_t *); +extern int freedby_walk_step(mdb_walk_state_t *); +extern void freedby_walk_fini(mdb_walk_state_t *); + +extern int umem_log_walk_init(mdb_walk_state_t *); +extern int umem_log_walk_step(mdb_walk_state_t *); +extern void umem_log_walk_fini(mdb_walk_state_t *); + +extern int allocdby_walk_init(mdb_walk_state_t *); +extern int allocdby_walk_step(mdb_walk_state_t *); +extern void allocdby_walk_fini(mdb_walk_state_t *); + +extern int freedby_walk_init(mdb_walk_state_t *); +extern int freedby_walk_step(mdb_walk_state_t *); +extern void freedby_walk_fini(mdb_walk_state_t *); + +extern int vmem_walk_init(mdb_walk_state_t *); +extern int vmem_walk_step(mdb_walk_state_t *); +extern void vmem_walk_fini(mdb_walk_state_t *); + +extern int vmem_postfix_walk_step(mdb_walk_state_t *); + +extern int vmem_seg_walk_init(mdb_walk_state_t *); +extern int vmem_seg_walk_step(mdb_walk_state_t *); +extern void vmem_seg_walk_fini(mdb_walk_state_t *); + +extern int vmem_span_walk_init(mdb_walk_state_t *); +extern int vmem_alloc_walk_init(mdb_walk_state_t *); +extern int vmem_free_walk_init(mdb_walk_state_t *); + +extern int allocdby(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int bufctl(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int bufctl_audit(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int freedby(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umalog(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umausers(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umem_cache(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umem_log(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umem_malloc_dist(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umem_malloc_info(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umem_status(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umem_verify(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umem_verify_alloc(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int umem_verify_free(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int vmem(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int vmem_seg(uintptr_t, uint_t, int, const mdb_arg_t *); +extern int whatis(uintptr_t, uint_t, int, const mdb_arg_t *); + +extern void bufctl_help(void); +extern void umem_malloc_dist_help(void); +extern void umem_malloc_info_help(void); +extern void vmem_seg_help(void); + +/* + * utility functions for the rest of libumem + */ +extern int umem_init(void); +extern int umem_get_magsize(const umem_cache_t *); +extern size_t umem_estimate_allocated(uintptr_t, const umem_cache_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _MDBMOD_UMEM_H */ diff --git a/umemdbg/mdb/common/umem_pagesize.h b/umemdbg/mdb/common/umem_pagesize.h new file mode 100644 index 0000000..10032f3 --- /dev/null +++ b/umemdbg/mdb/common/umem_pagesize.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2002 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _UMEM_PAGESIZE_H +#define _UMEM_PAGESIZE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +extern size_t umem_pagesize; +#undef PAGESIZE +#define PAGESIZE (umem_pagesize) + +#ifdef __cplusplus +} +#endif + +#endif /* _UMEM_PAGESIZE_H */ diff --git a/vmem.c b/vmem.c index 963a033..49d0b42 100644 --- a/vmem.c +++ b/vmem.c @@ -23,7 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2012 Joyent, Inc. All rights reserved. + * Copyright 2012 Joyent, Inc. All rights reserved. */ /* #pragma ident "@(#)vmem.c 1.10 05/06/08 SMI" */ diff --git a/vmem_base.c b/vmem_base.c index 34ebe18..9cf3883 100644 --- a/vmem_base.c +++ b/vmem_base.c @@ -23,7 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2012 Joyent, Inc. All rights reserved. + * Copyright 2012 Joyent, Inc. All rights reserved. */ /* #pragma ident "@(#)vmem_base.c 1.6 05/06/08 SMI" */ diff --git a/vmem_base.h b/vmem_base.h index f2a56c9..dc8d8e3 100644 --- a/vmem_base.h +++ b/vmem_base.h @@ -2,8 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, (the "License"). - You may not use this file except in compliance with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -22,7 +22,7 @@ * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2012 Joyent, Inc. All rights reserved. + * Copyright 2012 Joyent, Inc. All rights reserved. */ #ifndef _VMEM_BASE_H diff --git a/vmem_mmap.c b/vmem_mmap.c index 6cf0d07..f762c5b 100644 --- a/vmem_mmap.c +++ b/vmem_mmap.c @@ -23,7 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ /* #pragma ident "@(#)vmem_mmap.c 1.2 05/06/08 SMI" */ diff --git a/vmem_sbrk.c b/vmem_sbrk.c index 9bf17b8..d7bbab1 100644 --- a/vmem_sbrk.c +++ b/vmem_sbrk.c @@ -23,7 +23,7 @@ * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Portions Copyright 2006-2008 Message Systems, Inc. All rights reserved. + * Copyright 2006-2008 Message Systems, Inc. All rights reserved. */ /* #pragma ident "@(#)vmem_sbrk.c 1.4 05/06/08 SMI" */