pcompress/allocator.c
Moinak Ghosh 94563a7ecd Fix buffer size computation when allocating Rabin block array.
Reduce memory usage of Rabin block array.
Add an SSE optimization for bsdiff.
Move integer hashing function to utils file.
More updates to README.
2012-07-28 23:55:24 +05:30

513 lines
No EOL
12 KiB
C

/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*
* This program includes partly-modified public domain source
* code from the LZMA SDK: http://www.7-zip.org/sdk.html
*/
/*
* A basic slab allocator that uses power of 2 and fixed interval
* slab sizes and uses integer hashing to track pointers. It uses
* per-slab and per-hash-bucket locking for scalability. This
* allocator is being used in Pcompress as repeated compression of
* fixed-size chunks causes repeated and predictable memory allocation
* and freeing patterns. Using pre-allocated buffer pools in this
* case causes significant speedup.
*
* There is no provision yet to reap buffers from high-usage slabs
* and return them to the heap.
*/
#include <sys/types.h>
#include <sys/param.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <pthread.h>
#include <math.h>
#include "utils.h"
#include "allocator.h"
#ifndef DEBUG_NO_SLAB
/*
* Number of slabs:
* 256 bytes to 1M in power of 2 steps: 13
* 1M to 128M in linear steps of 1M: 128
* 200 dynamic slab slots: 200
*
* By doing this we try to get reasonable memory usage while not
* sacrificing performance.
*/
#define NUM_POW2 13
#define NUM_LINEAR 128
#define NUM_SLAB_HASH 200 /* Dynamic slabs hashtable size. */
#define NUM_SLABS (NUM_POW2 + NUM_LINEAR + NUM_SLAB_HASH)
#define SLAB_POS_HASH (NUM_POW2 + NUM_LINEAR)
#define SLAB_START_SZ 256 /* Starting slab size in Bytes. */
#define SLAB_START_POW2 8 /* 2 ^ SLAB_START_POW2 = SLAB_START. */
#define HTABLE_SZ 8192
#define TWOM (2UL * 1024UL * 1024UL)
#define ONEM (1UL * 1024UL * 1024UL)
static const unsigned int bv[] = {
0xAAAAAAAA,
0xCCCCCCCC,
0xF0F0F0F0,
0xFF00FF00,
0xFFFF0000
};
struct slabentry {
struct bufentry *avail;
struct slabentry *next;
size_t sz;
uint64_t allocs, hits;
pthread_mutex_t slab_lock;
};
struct bufentry {
void *ptr;
struct slabentry *slab;
struct bufentry *next;
};
static struct slabentry slabheads[NUM_SLABS];
static struct bufentry **htable;
static pthread_mutex_t *hbucket_locks;
static pthread_mutex_t htable_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t slab_table_lock = PTHREAD_MUTEX_INITIALIZER;
static int inited = 0, bypass = 0;
static uint64_t total_allocs, oversize_allocs, hash_collisions, hash_entries;
void
slab_init()
{
int i;
size_t slab_sz;
int nprocs;
/* Check bypass env variable. */
if (getenv("ALLOCATOR_BYPASS") != NULL) {
bypass = 1;
return;
}
/* Initialize first NUM_POW2 power of 2 slots. */
slab_sz = SLAB_START_SZ;
for (i = 0; i < NUM_POW2; i++) {
slabheads[i].avail = NULL;
slabheads[i].sz = slab_sz;
slabheads[i].allocs = 0;
slabheads[i].hits = 0;
/* Speed up: Copy from already inited but not yet used lock object. */
slabheads[i].slab_lock = htable_lock;
slab_sz *= 2;
}
/* At this point slab_sz is 1M. So linear slots start at 1M. */
for (i = NUM_POW2; i < SLAB_POS_HASH; i++) {
slabheads[i].avail = NULL;
slabheads[i].next = NULL;
slabheads[i].sz = slab_sz;
slabheads[i].allocs = 0;
slabheads[i].hits = 0;
/* Speed up: Copy from already inited but not yet used lock object. */
slabheads[i].slab_lock = htable_lock;
slab_sz += ONEM;
}
for (i = SLAB_POS_HASH; i < NUM_SLABS; i++) {
slabheads[i].avail = NULL;
slabheads[i].next = NULL;
slabheads[i].sz = 0;
slabheads[i].allocs = 0;
slabheads[i].hits = 0;
/* Do not init locks here. They will be inited on demand. */
}
htable = (struct bufentry **)calloc(HTABLE_SZ, sizeof (struct bufentry *));
hbucket_locks = (pthread_mutex_t *)malloc(HTABLE_SZ * sizeof (pthread_mutex_t));
for (i=0; i<HTABLE_SZ; i++)
hbucket_locks[i] = htable_lock;
total_allocs = 0;
oversize_allocs = 0;
hash_collisions = 0;
hash_entries = 0;
inited = 1;
}
void
slab_cleanup(int quiet)
{
int i;
struct bufentry *buf, *buf1;
uint64_t nonfreed_oversize;
if (!inited) return;
if (bypass) return;
if (!quiet) {
fprintf(stderr, "Slab Allocation Stats\n");
fprintf(stderr, "==================================================================\n");
fprintf(stderr, " Slab Size | Allocations | Hits |\n");
fprintf(stderr, "==================================================================\n");
}
for (i=0; i<NUM_SLABS; i++)
{
struct slabentry *slab;
slab = &slabheads[i];
while (slab) {
if (slab->avail) {
if (!quiet) {
fprintf(stderr, "%21llu %21llu %21llu\n",slab->sz,
slab->allocs, slab->hits);
}
slab->allocs = 0;
buf = slab->avail;
do {
buf1 = buf->next;
free(buf->ptr);
free(buf);
buf = buf1;
} while (buf);
}
slab = slab->next;
}
}
if (!quiet) {
fprintf(stderr, "==================================================================\n");
fprintf(stderr, "Oversize Allocations : %llu\n", oversize_allocs);
fprintf(stderr, "Total Requests : %llu\n", total_allocs);
fprintf(stderr, "Hash collisions : %llu\n", hash_collisions);
fprintf(stderr, "Leaked allocations : %llu\n", hash_entries);
}
if (hash_entries > 0) {
nonfreed_oversize = 0;
for (i=0; i<HTABLE_SZ; i++) {
buf = htable[i];
while (buf) {
if (buf->slab == NULL) {
nonfreed_oversize++;
} else {
buf->slab->allocs++;
}
buf1 = buf->next;
free(buf->ptr);
free(buf);
buf = buf1;
}
}
free(htable);
free(hbucket_locks);
if (!quiet) {
fprintf(stderr, "==================================================================\n");
fprintf(stderr, " Slab Size | Allocations: leaked |\n");
fprintf(stderr, "==================================================================\n");
for (i=0; i<NUM_SLABS; i++)
{
struct slabentry *slab;
slab = &slabheads[i];
do {
if (slab->allocs > 0)
fprintf(stderr, "%21llu %21llu\n", \
slab->sz, slab->allocs);
slab = slab->next;
} while (slab);
}
}
}
for (i=0; i<NUM_SLABS; i++)
{
struct slabentry *slab, *pslab;
int j;
slab = &slabheads[i];
j = 0;
do {
pslab = slab;
slab = slab->next;
if (j > 0) free(pslab);
j++;
} while (slab);
}
if (!quiet) fprintf(stderr, "\n\n");
}
void *
slab_calloc(void *p, size_t items, size_t size) {
void *ptr;
if (bypass) return(calloc(items, size));
ptr = slab_alloc(p, items * size);
memset(ptr, 0, items * size);
return (ptr);
}
/*
* Find the power of 2 slab slot which will hold a given size.
*/
static unsigned int
find_slot(unsigned int v)
{
unsigned int r, i;
/* Round up to nearest power of 2 */
v = roundup_pow_two(v);
/*
* Get the log2 of the above. From Bit Twiddling Hacks:
* http://graphics.stanford.edu/~seander/bithacks.html
*
* This essentially tells us which bit is set.
*/
r = (v & bv[0]) != 0;
r |= ((v & bv[4]) != 0) << 4;
r |= ((v & bv[3]) != 0) << 3;
r |= ((v & bv[2]) != 0) << 2;
r |= ((v & bv[1]) != 0) << 1;
/* Rebase to starting power of 2 slot number. */
if (r > SLAB_START_POW2)
r -= SLAB_START_POW2;
else
r = 0;
return (r);
}
static void *
try_dynamic_slab(size_t size)
{
uint32_t sindx;
struct slabentry *slab;
/* Locate the hash slot for the size. */
sindx = hash6432shift((unsigned long)size) & (NUM_SLAB_HASH - 1);
sindx += SLAB_POS_HASH;
if (slabheads[sindx].sz == 0) return (NULL);
/* Linear search in the chained buckets. */
slab = &slabheads[sindx];
while (slab && slab->sz != size) {
slab = slab->next;
}
return (slab);
}
int
slab_cache_add(size_t size)
{
uint32_t sindx;
struct slabentry *slab;
if (bypass) return (0);
if (try_dynamic_slab(size)) return (0); /* Already added. */
/* Locate the hash slot for the size. */
sindx = hash6432shift((unsigned long)size) & (NUM_SLAB_HASH - 1);
sindx += SLAB_POS_HASH;
if (slabheads[sindx].sz == 0) {
pthread_mutex_init(&(slabheads[sindx].slab_lock), NULL);
pthread_mutex_lock(&(slabheads[sindx].slab_lock));
slabheads[sindx].sz = size;
pthread_mutex_unlock(&(slabheads[sindx].slab_lock));
} else {
slab = (struct slabentry *)malloc(sizeof (struct slabentry));
if (!slab) return (0);
slab->avail = NULL;
slab->sz = size;
slab->allocs = 0;
slab->hits = 0;
pthread_mutex_init(&(slab->slab_lock), NULL);
pthread_mutex_lock(&(slabheads[sindx].slab_lock));
slab->next = slabheads[sindx].next;
slabheads[sindx].next = slab;
pthread_mutex_unlock(&(slabheads[sindx].slab_lock));
}
return (1);
}
void *
slab_alloc(void *p, size_t size)
{
size_t slab_sz = SLAB_START_SZ;
int i;
size_t div;
void *ptr;
struct slabentry *slab;
if (bypass) return (malloc(size));
ATOMIC_ADD(total_allocs, 1);
slab = NULL;
/* First check if we can use a dynamic slab of this size. */
slab = try_dynamic_slab(size);
if (!slab) {
if (size <= ONEM) {
/* First eleven slots are power of 2 sizes upto 1M. */
slab = &slabheads[find_slot(size)];
} else {
/* Next slots are in intervals of 1M. */
div = size / ONEM;
if (size % ONEM) div++;
if (div < NUM_LINEAR) slab = &slabheads[div + NUM_POW2];
}
}
if (!slab) {
struct bufentry *buf = (struct bufentry *)malloc(sizeof (struct bufentry));
uint32_t hindx;
buf->ptr = malloc(size);
buf->slab = NULL;
hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1);
pthread_mutex_lock(&hbucket_locks[hindx]);
buf->next = htable[hindx];
htable[hindx] = buf;
pthread_mutex_unlock(&hbucket_locks[hindx]);
ATOMIC_ADD(oversize_allocs, 1);
ATOMIC_ADD(hash_entries, 1);
return (buf->ptr);
} else {
struct bufentry *buf;
uint32_t hindx;
pthread_mutex_lock(&(slab->slab_lock));
if (slab->avail == NULL) {
slab->allocs++;
pthread_mutex_unlock(&(slab->slab_lock));
buf = (struct bufentry *)malloc(sizeof (struct bufentry));
buf->ptr = malloc(slab->sz);
buf->slab = slab;
} else {
buf = slab->avail;
slab->avail = buf->next;
slab->hits++;
pthread_mutex_unlock(&(slab->slab_lock));
}
hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1);
if (htable[hindx]) ATOMIC_ADD(hash_collisions, 1);
pthread_mutex_lock(&hbucket_locks[hindx]);
buf->next = htable[hindx];
htable[hindx] = buf;
pthread_mutex_unlock(&hbucket_locks[hindx]);
ATOMIC_ADD(hash_entries, 1);
return (buf->ptr);
}
}
void
slab_free(void *p, void *address)
{
struct bufentry *buf, *pbuf;
int found = 0;
uint32_t hindx;
if (!address) return;
if (bypass) { free(address); return; }
hindx = hash6432shift((uint64_t)(address)) & (HTABLE_SZ - 1);
pthread_mutex_lock(&hbucket_locks[hindx]);
buf = htable[hindx];
pbuf = NULL;
while (buf) {
if (buf->ptr == address) {
if (hash_entries <=0) {
fprintf(stderr, "Inconsistent allocation hash\n");
abort();
}
if (pbuf)
pbuf->next = buf->next;
else
htable[hindx] = buf->next;
pthread_mutex_unlock(&hbucket_locks[hindx]);
ATOMIC_SUB(hash_entries, 1);
if (buf->slab == NULL) {
free(buf->ptr);
free(buf);
found = 1;
break;
} else {
pthread_mutex_lock(&(buf->slab->slab_lock));
buf->next = buf->slab->avail;
buf->slab->avail = buf;
pthread_mutex_unlock(&(buf->slab->slab_lock));
found = 1;
break;
}
}
pbuf = buf;
buf = buf->next;
}
if (!found) {
pthread_mutex_unlock(&hbucket_locks[hindx]);
free(address);
fprintf(stderr, "Freed buf(%p) not in slab allocations!\n", address);
abort();
fflush(stderr);
}
}
#else
void
slab_init() {}
void
slab_cleanup(int quiet) {}
void
*slab_alloc(void *p, size_t size)
{
return (malloc(size));
}
void
*slab_calloc(void *p, size_t items, size_t size)
{
return (calloc(items, size));
}
void
slab_free(void *p, void *address)
{
free(address);
}
int
slab_cache_add(size_t size) {}
#endif