/* * This file is a part of Pcompress, a chunked parallel multi- * algorithm lossless compression and decompression program. * * Copyright (C) 2012 Moinak Ghosh. All rights reserved. * Use is subject to license terms. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * moinakg@belenix.org, http://moinakg.wordpress.com/ * * This program includes partly-modified public domain source * code from the LZMA SDK: http://www.7-zip.org/sdk.html */ /* * A basic slab allocator that uses power of 2 and fixed interval * slab sizes and uses integer hashing to track pointers. It uses * per-slab and per-hash-bucket locking for scalability. This * allocator is being used in Pcompress as repeated compression of * fixed-size chunks causes repeated and predictable memory allocation * and freeing patterns. Using pre-allocated buffer pools in this * case causes significant speedup. * * There is no provision yet to reap buffers from high-usage slabs * and return them to the heap. */ #include #include #include #include #include #include #include #include #include #include #include #include "utils.h" #include "allocator.h" /* * Number of slabs: * 256 bytes to 1M in power of 2 steps: 13 * 1M to 256M in linear steps of 1M: 256 * * By doing this we try to get reasonable memory usage while not * sacrificing performance. */ #define NUM_SLABS 269 #define NUM_POW2 13 #define SLAB_START 256 #define SLAB_START_POW2 8 /* 2 ^ SLAB_START_POW2 = SLAB_START. */ #define HTABLE_SZ 16384 #define TWOM (2UL * 1024UL * 1024UL) #define ONEM (1UL * 1024UL * 1024UL) static const unsigned int bv[] = { 0xAAAAAAAA, 0xCCCCCCCC, 0xF0F0F0F0, 0xFF00FF00, 0xFFFF0000 }; struct bufentry { void *ptr; int slab_index; struct bufentry *next; }; struct slabentry { struct bufentry *avail; struct bufentry *used; size_t sz; uint64_t allocs, hits; pthread_mutex_t slab_lock; }; static struct slabentry slabheads[NUM_SLABS]; static struct bufentry **htable; static pthread_mutex_t *hbucket_locks; static pthread_mutex_t htable_lock = PTHREAD_MUTEX_INITIALIZER; static int inited = 0; static uint64_t total_allocs, oversize_allocs, hash_collisions, hash_entries; /* * Hash function for 64Bit pointers that generates a 32Bit hash value. * Taken from Thomas Wang's Integer hashing paper: * http://www.cris.com/~Ttwang/tech/inthash.htm */ uint32_t hash6432shift(uint64_t key) { key = (~key) + (key << 18); // key = (key << 18) - key - 1; key = key ^ (key >> 31); key = key * 21; // key = (key + (key << 2)) + (key << 4); key = key ^ (key >> 11); key = key + (key << 6); key = key ^ (key >> 22); return (uint32_t) key; } void slab_init() { int i; size_t slab_sz; int nprocs; /* Initialize first NUM_POW2 power of 2 slots. */ slab_sz = SLAB_START; for (i = 0; i < NUM_POW2; i++) { slabheads[i].avail = NULL; slabheads[i].used = NULL; slabheads[i].sz = slab_sz; slabheads[i].allocs = 0; slabheads[i].hits = 0; /* Speed up: Copy from already inited but not yet used lock object. */ slabheads[i].slab_lock = htable_lock; slab_sz *= 2; } /* At this point slab_sz is 2M. So linear slots start at 2M. */ for (i = NUM_POW2; i < NUM_SLABS; i++) { slabheads[i].avail = NULL; slabheads[i].used = NULL; slabheads[i].sz = slab_sz; slabheads[i].allocs = 0; slabheads[i].hits = 0; /* Speed up: Copy from already inited but not yet used lock object. */ slabheads[i].slab_lock = htable_lock; slab_sz += ONEM; } htable = (struct bufentry **)calloc(HTABLE_SZ, sizeof (struct bufentry *)); hbucket_locks = (pthread_mutex_t *)malloc(HTABLE_SZ * sizeof (pthread_mutex_t)); for (i=0; inext; free(buf->ptr); free(buf); buf = buf1; } while (buf); } } if (!quiet) { fprintf(stderr, "==================================================================\n"); fprintf(stderr, "Oversize Allocations : %llu\n", oversize_allocs); fprintf(stderr, "Total Requests : %llu\n", total_allocs); fprintf(stderr, "Hash collisions : %llu\n", hash_collisions); fprintf(stderr, "Leaked allocations : %llu\n", hash_entries); } if (hash_entries > 0) { nonfreed_oversize = 0; for (i=0; islab_index == -1) { nonfreed_oversize++; } else { slabheads[buf->slab_index].allocs++; } buf1 = buf->next; free(buf->ptr); free(buf); buf = buf1; } } free(htable); free(hbucket_locks); if (!quiet) { fprintf(stderr, "==================================================================\n"); fprintf(stderr, " Slab Size | Allocations: leaked |\n"); fprintf(stderr, "==================================================================\n"); for (i=0; i SLAB_START_POW2) r -= SLAB_START_POW2; else r = 0; return (r); } void * slab_alloc(void *p, size_t size) { size_t slab_sz = SLAB_START; int i, found; size_t div; ATOMIC_ADD(total_allocs, 1); found = -1; if (size <= ONEM) { /* First eleven slots are power of 2 sizes upto 1M. */ found = find_slot(size); } else { /* Next slots are in intervals of 1M. */ div = size / ONEM; if (size % ONEM) div++; if (div < NUM_SLABS) found = div + NUM_POW2; } if (found == -1) { struct bufentry *buf = (struct bufentry *)malloc(sizeof (struct bufentry)); uint32_t hindx; buf->ptr = malloc(size); buf->slab_index = -1; hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1); pthread_mutex_lock(&hbucket_locks[hindx]); buf->next = htable[hindx]; htable[hindx] = buf; pthread_mutex_unlock(&hbucket_locks[hindx]); ATOMIC_ADD(oversize_allocs, 1); return (buf->ptr); } else { struct bufentry *buf; uint32_t hindx; pthread_mutex_lock(&(slabheads[found].slab_lock)); if (slabheads[found].avail == NULL) { slabheads[found].allocs++; pthread_mutex_unlock(&(slabheads[found].slab_lock)); buf = (struct bufentry *)malloc(sizeof (struct bufentry)); buf->ptr = malloc(slabheads[found].sz); buf->slab_index = found; hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1); if (htable[hindx]) ATOMIC_ADD(hash_collisions, 1); pthread_mutex_lock(&hbucket_locks[hindx]); buf->next = htable[hindx]; htable[hindx] = buf; pthread_mutex_unlock(&hbucket_locks[hindx]); ATOMIC_ADD(hash_entries, 1); } else { buf = slabheads[found].avail; slabheads[found].avail = buf->next; slabheads[found].hits++; pthread_mutex_unlock(&(slabheads[found].slab_lock)); hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1); if (htable[hindx]) ATOMIC_ADD(hash_collisions, 1); pthread_mutex_lock(&hbucket_locks[hindx]); buf->next = htable[hindx]; htable[hindx] = buf; pthread_mutex_unlock(&hbucket_locks[hindx]); ATOMIC_ADD(hash_entries, 1); } return (buf->ptr); } } void slab_free(void *p, void *address) { struct bufentry *buf, *pbuf; int found = 0; uint32_t hindx; if (!address) return; hindx = hash6432shift((uint64_t)(address)) & (HTABLE_SZ - 1); pthread_mutex_lock(&hbucket_locks[hindx]); buf = htable[hindx]; pbuf = NULL; while (buf) { if (buf->ptr == address) { if (buf->slab_index == -1) { if (pbuf) pbuf->next = buf->next; else htable[hindx] = buf->next; pthread_mutex_unlock(&hbucket_locks[hindx]); ATOMIC_SUB(hash_entries, 1); free(buf->ptr); free(buf); found = 1; break; } else { if (pbuf) pbuf->next = buf->next; else htable[hindx] = buf->next; pthread_mutex_unlock(&hbucket_locks[hindx]); ATOMIC_SUB(hash_entries, 1); pthread_mutex_lock(&(slabheads[buf->slab_index].slab_lock)); buf->next = slabheads[buf->slab_index].avail; slabheads[buf->slab_index].avail = buf; pthread_mutex_unlock(&(slabheads[buf->slab_index].slab_lock)); found = 1; break; } } pbuf = buf; buf = buf->next; } if (!found) { pthread_mutex_unlock(&hbucket_locks[hindx]); free(address); fprintf(stderr, "Freed buf(%p) not in slab allocations!\n", address); fflush(stderr); } }