New capability in allocator to add slab caches with user-specified size.
Use specific slab caches in main program and Lzma to slightly reduce memory footprint. Fix missing hashtable counter update in allocator. Slight cleanup of repeated computation with macro in Lzma.
This commit is contained in:
parent
07dfed7769
commit
2eaf151ca0
7 changed files with 210 additions and 99 deletions
8
Makefile
8
Makefile
|
@ -40,13 +40,19 @@ BAKFILES = *~ lzma/*~
|
||||||
|
|
||||||
RM = rm -f
|
RM = rm -f
|
||||||
CPPFLAGS = -I. -I./lzma -D_7ZIP_ST -DNODEFAULT_PROPS -DFILE_OFFSET_BITS=64 \
|
CPPFLAGS = -I. -I./lzma -D_7ZIP_ST -DNODEFAULT_PROPS -DFILE_OFFSET_BITS=64 \
|
||||||
-D_REENTRANT -D__USE_SSE_INTRIN__ -DNDEBUG -D_LZMA_PROB32
|
-D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32
|
||||||
VEC_FLAGS = -ftree-vectorize
|
VEC_FLAGS = -ftree-vectorize
|
||||||
LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
||||||
LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm
|
LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm
|
||||||
|
|
||||||
|
ifdef DEBUG
|
||||||
|
LINK = gcc -m64 -pthread -msse3
|
||||||
|
COMPILE = gcc -m64 -g -msse3 -c
|
||||||
|
else
|
||||||
LINK = gcc -m64 -pthread -msse3
|
LINK = gcc -m64 -pthread -msse3
|
||||||
COMPILE = gcc -m64 -O3 -msse3 -c
|
COMPILE = gcc -m64 -O3 -msse3 -c
|
||||||
|
CPPFLAGS += -DNDEBUG
|
||||||
|
endif
|
||||||
|
|
||||||
all: $(PROG)
|
all: $(PROG)
|
||||||
|
|
||||||
|
|
218
allocator.c
218
allocator.c
|
@ -51,16 +51,21 @@
|
||||||
/*
|
/*
|
||||||
* Number of slabs:
|
* Number of slabs:
|
||||||
* 256 bytes to 1M in power of 2 steps: 13
|
* 256 bytes to 1M in power of 2 steps: 13
|
||||||
* 1M to 256M in linear steps of 1M: 256
|
* 1M to 128M in linear steps of 1M: 128
|
||||||
|
* 200 dynamic slab slots: 200
|
||||||
*
|
*
|
||||||
* By doing this we try to get reasonable memory usage while not
|
* By doing this we try to get reasonable memory usage while not
|
||||||
* sacrificing performance.
|
* sacrificing performance.
|
||||||
*/
|
*/
|
||||||
#define NUM_SLABS 269
|
|
||||||
#define NUM_POW2 13
|
#define NUM_POW2 13
|
||||||
#define SLAB_START 256
|
#define NUM_LINEAR 128
|
||||||
|
#define NUM_SLAB_HASH 200 /* Dynamic slabs hashtable size. */
|
||||||
|
#define NUM_SLABS (NUM_POW2 + NUM_LINEAR + NUM_SLAB_HASH)
|
||||||
|
#define SLAB_POS_HASH (NUM_POW2 + NUM_LINEAR)
|
||||||
|
#define SLAB_START_SZ 256 /* Starting slab size in Bytes. */
|
||||||
#define SLAB_START_POW2 8 /* 2 ^ SLAB_START_POW2 = SLAB_START. */
|
#define SLAB_START_POW2 8 /* 2 ^ SLAB_START_POW2 = SLAB_START. */
|
||||||
#define HTABLE_SZ 16384
|
|
||||||
|
#define HTABLE_SZ 8192
|
||||||
#define TWOM (2UL * 1024UL * 1024UL)
|
#define TWOM (2UL * 1024UL * 1024UL)
|
||||||
#define ONEM (1UL * 1024UL * 1024UL)
|
#define ONEM (1UL * 1024UL * 1024UL)
|
||||||
|
|
||||||
|
@ -72,22 +77,24 @@ static const unsigned int bv[] = {
|
||||||
0xFFFF0000
|
0xFFFF0000
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bufentry {
|
|
||||||
void *ptr;
|
|
||||||
int slab_index;
|
|
||||||
struct bufentry *next;
|
|
||||||
};
|
|
||||||
struct slabentry {
|
struct slabentry {
|
||||||
struct bufentry *avail;
|
struct bufentry *avail;
|
||||||
struct bufentry *used;
|
struct slabentry *next;
|
||||||
size_t sz;
|
size_t sz;
|
||||||
uint64_t allocs, hits;
|
uint64_t allocs, hits;
|
||||||
pthread_mutex_t slab_lock;
|
pthread_mutex_t slab_lock;
|
||||||
};
|
};
|
||||||
|
struct bufentry {
|
||||||
|
void *ptr;
|
||||||
|
struct slabentry *slab;
|
||||||
|
struct bufentry *next;
|
||||||
|
};
|
||||||
|
|
||||||
static struct slabentry slabheads[NUM_SLABS];
|
static struct slabentry slabheads[NUM_SLABS];
|
||||||
static struct bufentry **htable;
|
static struct bufentry **htable;
|
||||||
static pthread_mutex_t *hbucket_locks;
|
static pthread_mutex_t *hbucket_locks;
|
||||||
static pthread_mutex_t htable_lock = PTHREAD_MUTEX_INITIALIZER;
|
static pthread_mutex_t htable_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
static pthread_mutex_t slab_table_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
static int inited = 0;
|
static int inited = 0;
|
||||||
|
|
||||||
static uint64_t total_allocs, oversize_allocs, hash_collisions, hash_entries;
|
static uint64_t total_allocs, oversize_allocs, hash_collisions, hash_entries;
|
||||||
|
@ -117,10 +124,9 @@ slab_init()
|
||||||
int nprocs;
|
int nprocs;
|
||||||
|
|
||||||
/* Initialize first NUM_POW2 power of 2 slots. */
|
/* Initialize first NUM_POW2 power of 2 slots. */
|
||||||
slab_sz = SLAB_START;
|
slab_sz = SLAB_START_SZ;
|
||||||
for (i = 0; i < NUM_POW2; i++) {
|
for (i = 0; i < NUM_POW2; i++) {
|
||||||
slabheads[i].avail = NULL;
|
slabheads[i].avail = NULL;
|
||||||
slabheads[i].used = NULL;
|
|
||||||
slabheads[i].sz = slab_sz;
|
slabheads[i].sz = slab_sz;
|
||||||
slabheads[i].allocs = 0;
|
slabheads[i].allocs = 0;
|
||||||
slabheads[i].hits = 0;
|
slabheads[i].hits = 0;
|
||||||
|
@ -129,10 +135,10 @@ slab_init()
|
||||||
slab_sz *= 2;
|
slab_sz *= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* At this point slab_sz is 2M. So linear slots start at 2M. */
|
/* At this point slab_sz is 1M. So linear slots start at 1M. */
|
||||||
for (i = NUM_POW2; i < NUM_SLABS; i++) {
|
for (i = NUM_POW2; i < SLAB_POS_HASH; i++) {
|
||||||
slabheads[i].avail = NULL;
|
slabheads[i].avail = NULL;
|
||||||
slabheads[i].used = NULL;
|
slabheads[i].next = NULL;
|
||||||
slabheads[i].sz = slab_sz;
|
slabheads[i].sz = slab_sz;
|
||||||
slabheads[i].allocs = 0;
|
slabheads[i].allocs = 0;
|
||||||
slabheads[i].hits = 0;
|
slabheads[i].hits = 0;
|
||||||
|
@ -141,6 +147,14 @@ slab_init()
|
||||||
slab_sz += ONEM;
|
slab_sz += ONEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = SLAB_POS_HASH; i < NUM_SLABS; i++) {
|
||||||
|
slabheads[i].avail = NULL;
|
||||||
|
slabheads[i].next = NULL;
|
||||||
|
slabheads[i].sz = 0;
|
||||||
|
slabheads[i].allocs = 0;
|
||||||
|
slabheads[i].hits = 0;
|
||||||
|
/* Do not init locks here. They will be inited on demand. */
|
||||||
|
}
|
||||||
htable = (struct bufentry **)calloc(HTABLE_SZ, sizeof (struct bufentry *));
|
htable = (struct bufentry **)calloc(HTABLE_SZ, sizeof (struct bufentry *));
|
||||||
hbucket_locks = (pthread_mutex_t *)malloc(HTABLE_SZ * sizeof (pthread_mutex_t));
|
hbucket_locks = (pthread_mutex_t *)malloc(HTABLE_SZ * sizeof (pthread_mutex_t));
|
||||||
|
|
||||||
|
@ -172,13 +186,17 @@ slab_cleanup(int quiet)
|
||||||
|
|
||||||
for (i=0; i<NUM_SLABS; i++)
|
for (i=0; i<NUM_SLABS; i++)
|
||||||
{
|
{
|
||||||
if (slabheads[i].avail) {
|
struct slabentry *slab;
|
||||||
|
|
||||||
|
slab = &slabheads[i];
|
||||||
|
while (slab) {
|
||||||
|
if (slab->avail) {
|
||||||
if (!quiet) {
|
if (!quiet) {
|
||||||
fprintf(stderr, "%21llu %21llu %21llu\n",slabheads[i].sz,
|
fprintf(stderr, "%21llu %21llu %21llu\n",slab->sz,
|
||||||
slabheads[i].allocs, slabheads[i].hits);
|
slab->allocs, slab->hits);
|
||||||
}
|
}
|
||||||
slabheads[i].allocs = 0;
|
slab->allocs = 0;
|
||||||
buf = slabheads[i].avail;
|
buf = slab->avail;
|
||||||
do {
|
do {
|
||||||
buf1 = buf->next;
|
buf1 = buf->next;
|
||||||
free(buf->ptr);
|
free(buf->ptr);
|
||||||
|
@ -186,6 +204,8 @@ slab_cleanup(int quiet)
|
||||||
buf = buf1;
|
buf = buf1;
|
||||||
} while (buf);
|
} while (buf);
|
||||||
}
|
}
|
||||||
|
slab = slab->next;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!quiet) {
|
if (!quiet) {
|
||||||
|
@ -202,10 +222,10 @@ slab_cleanup(int quiet)
|
||||||
buf = htable[i];
|
buf = htable[i];
|
||||||
|
|
||||||
while (buf) {
|
while (buf) {
|
||||||
if (buf->slab_index == -1) {
|
if (buf->slab == NULL) {
|
||||||
nonfreed_oversize++;
|
nonfreed_oversize++;
|
||||||
} else {
|
} else {
|
||||||
slabheads[buf->slab_index].allocs++;
|
buf->slab->allocs++;
|
||||||
}
|
}
|
||||||
buf1 = buf->next;
|
buf1 = buf->next;
|
||||||
free(buf->ptr);
|
free(buf->ptr);
|
||||||
|
@ -222,11 +242,32 @@ slab_cleanup(int quiet)
|
||||||
fprintf(stderr, "==================================================================\n");
|
fprintf(stderr, "==================================================================\n");
|
||||||
for (i=0; i<NUM_SLABS; i++)
|
for (i=0; i<NUM_SLABS; i++)
|
||||||
{
|
{
|
||||||
if (slabheads[i].allocs == 0) continue;
|
struct slabentry *slab;
|
||||||
fprintf(stderr, "%21llu %21llu\n",slabheads[i].sz, slabheads[i].allocs);
|
|
||||||
|
slab = &slabheads[i];
|
||||||
|
do {
|
||||||
|
if (slab->allocs > 0)
|
||||||
|
fprintf(stderr, "%21llu %21llu\n", \
|
||||||
|
slab->sz, slab->allocs);
|
||||||
|
slab = slab->next;
|
||||||
|
} while (slab);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (i=0; i<NUM_SLABS; i++)
|
||||||
|
{
|
||||||
|
struct slabentry *slab, *pslab;
|
||||||
|
int j;
|
||||||
|
|
||||||
|
slab = &slabheads[i];
|
||||||
|
j = 0;
|
||||||
|
do {
|
||||||
|
pslab = slab;
|
||||||
|
slab = slab->next;
|
||||||
|
if (j > 0) free(pslab);
|
||||||
|
j++;
|
||||||
|
} while (slab);
|
||||||
|
}
|
||||||
if (!quiet) fprintf(stderr, "\n\n");
|
if (!quiet) fprintf(stderr, "\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,30 +312,91 @@ find_slot(unsigned int v)
|
||||||
return (r);
|
return (r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void *
|
||||||
|
try_dynamic_slab(size_t size)
|
||||||
|
{
|
||||||
|
uint32_t sindx;
|
||||||
|
struct slabentry *slab;
|
||||||
|
|
||||||
|
/* Locate the hash slot for the size. */
|
||||||
|
sindx = hash6432shift((unsigned long)size) & (NUM_SLAB_HASH - 1);
|
||||||
|
sindx += SLAB_POS_HASH;
|
||||||
|
if (slabheads[sindx].sz == 0) return (NULL);
|
||||||
|
|
||||||
|
/* Linear search in the chained buckets. */
|
||||||
|
slab = &slabheads[sindx];
|
||||||
|
while (slab && slab->sz != size) {
|
||||||
|
slab = slab->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (slab);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
slab_cache_add(size_t size)
|
||||||
|
{
|
||||||
|
uint32_t sindx;
|
||||||
|
struct slabentry *slab;
|
||||||
|
if (try_dynamic_slab(size)) return (0); /* Already added. */
|
||||||
|
|
||||||
|
/* Locate the hash slot for the size. */
|
||||||
|
sindx = hash6432shift((unsigned long)size) & (NUM_SLAB_HASH - 1);
|
||||||
|
sindx += SLAB_POS_HASH;
|
||||||
|
|
||||||
|
if (slabheads[sindx].sz == 0) {
|
||||||
|
pthread_mutex_init(&(slabheads[sindx].slab_lock), NULL);
|
||||||
|
pthread_mutex_lock(&(slabheads[sindx].slab_lock));
|
||||||
|
slabheads[sindx].sz = size;
|
||||||
|
pthread_mutex_unlock(&(slabheads[sindx].slab_lock));
|
||||||
|
} else {
|
||||||
|
slab = (struct slabentry *)malloc(sizeof (struct slabentry));
|
||||||
|
if (!slab) return (0);
|
||||||
|
slab->avail = NULL;
|
||||||
|
slab->sz = size;
|
||||||
|
slab->allocs = 0;
|
||||||
|
slab->hits = 0;
|
||||||
|
pthread_mutex_init(&(slab->slab_lock), NULL);
|
||||||
|
|
||||||
|
pthread_mutex_lock(&(slabheads[sindx].slab_lock));
|
||||||
|
slabheads[sindx].next = slab;
|
||||||
|
slab->next = slabheads[sindx].next;
|
||||||
|
pthread_mutex_unlock(&(slabheads[sindx].slab_lock));
|
||||||
|
}
|
||||||
|
return (1);
|
||||||
|
}
|
||||||
|
|
||||||
void *
|
void *
|
||||||
slab_alloc(void *p, size_t size)
|
slab_alloc(void *p, size_t size)
|
||||||
{
|
{
|
||||||
size_t slab_sz = SLAB_START;
|
size_t slab_sz = SLAB_START_SZ;
|
||||||
int i, found;
|
int i;
|
||||||
size_t div;
|
size_t div;
|
||||||
|
void *ptr;
|
||||||
|
struct slabentry *slab;
|
||||||
|
|
||||||
ATOMIC_ADD(total_allocs, 1);
|
ATOMIC_ADD(total_allocs, 1);
|
||||||
found = -1;
|
slab = NULL;
|
||||||
|
|
||||||
|
/* First check if we can use a dynamic slab of this size. */
|
||||||
|
slab = try_dynamic_slab(size);
|
||||||
|
|
||||||
|
if (!slab) {
|
||||||
if (size <= ONEM) {
|
if (size <= ONEM) {
|
||||||
/* First eleven slots are power of 2 sizes upto 1M. */
|
/* First eleven slots are power of 2 sizes upto 1M. */
|
||||||
found = find_slot(size);
|
slab = &slabheads[find_slot(size)];
|
||||||
} else {
|
} else {
|
||||||
/* Next slots are in intervals of 1M. */
|
/* Next slots are in intervals of 1M. */
|
||||||
div = size / ONEM;
|
div = size / ONEM;
|
||||||
if (size % ONEM) div++;
|
if (size % ONEM) div++;
|
||||||
if (div < NUM_SLABS) found = div + NUM_POW2;
|
if (div < NUM_LINEAR) slab = &slabheads[div + NUM_POW2];
|
||||||
}
|
}
|
||||||
if (found == -1) {
|
}
|
||||||
|
if (!slab) {
|
||||||
struct bufentry *buf = (struct bufentry *)malloc(sizeof (struct bufentry));
|
struct bufentry *buf = (struct bufentry *)malloc(sizeof (struct bufentry));
|
||||||
uint32_t hindx;
|
uint32_t hindx;
|
||||||
|
|
||||||
buf->ptr = malloc(size);
|
buf->ptr = malloc(size);
|
||||||
buf->slab_index = -1;
|
buf->slab = NULL;
|
||||||
hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1);
|
hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1);
|
||||||
|
|
||||||
pthread_mutex_lock(&hbucket_locks[hindx]);
|
pthread_mutex_lock(&hbucket_locks[hindx]);
|
||||||
|
@ -302,40 +404,33 @@ slab_alloc(void *p, size_t size)
|
||||||
htable[hindx] = buf;
|
htable[hindx] = buf;
|
||||||
pthread_mutex_unlock(&hbucket_locks[hindx]);
|
pthread_mutex_unlock(&hbucket_locks[hindx]);
|
||||||
ATOMIC_ADD(oversize_allocs, 1);
|
ATOMIC_ADD(oversize_allocs, 1);
|
||||||
|
ATOMIC_ADD(hash_entries, 1);
|
||||||
return (buf->ptr);
|
return (buf->ptr);
|
||||||
} else {
|
} else {
|
||||||
struct bufentry *buf;
|
struct bufentry *buf;
|
||||||
uint32_t hindx;
|
uint32_t hindx;
|
||||||
|
|
||||||
pthread_mutex_lock(&(slabheads[found].slab_lock));
|
pthread_mutex_lock(&(slab->slab_lock));
|
||||||
if (slabheads[found].avail == NULL) {
|
if (slab->avail == NULL) {
|
||||||
slabheads[found].allocs++;
|
slab->allocs++;
|
||||||
pthread_mutex_unlock(&(slabheads[found].slab_lock));
|
pthread_mutex_unlock(&(slab->slab_lock));
|
||||||
buf = (struct bufentry *)malloc(sizeof (struct bufentry));
|
buf = (struct bufentry *)malloc(sizeof (struct bufentry));
|
||||||
buf->ptr = malloc(slabheads[found].sz);
|
buf->ptr = malloc(slab->sz);
|
||||||
buf->slab_index = found;
|
buf->slab = slab;
|
||||||
hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1);
|
|
||||||
|
|
||||||
if (htable[hindx]) ATOMIC_ADD(hash_collisions, 1);
|
|
||||||
pthread_mutex_lock(&hbucket_locks[hindx]);
|
|
||||||
buf->next = htable[hindx];
|
|
||||||
htable[hindx] = buf;
|
|
||||||
pthread_mutex_unlock(&hbucket_locks[hindx]);
|
|
||||||
ATOMIC_ADD(hash_entries, 1);
|
|
||||||
} else {
|
} else {
|
||||||
buf = slabheads[found].avail;
|
buf = slab->avail;
|
||||||
slabheads[found].avail = buf->next;
|
slab->avail = buf->next;
|
||||||
slabheads[found].hits++;
|
slab->hits++;
|
||||||
pthread_mutex_unlock(&(slabheads[found].slab_lock));
|
pthread_mutex_unlock(&(slab->slab_lock));
|
||||||
hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1);
|
}
|
||||||
|
|
||||||
|
hindx = hash6432shift((unsigned long)(buf->ptr)) & (HTABLE_SZ - 1);
|
||||||
if (htable[hindx]) ATOMIC_ADD(hash_collisions, 1);
|
if (htable[hindx]) ATOMIC_ADD(hash_collisions, 1);
|
||||||
pthread_mutex_lock(&hbucket_locks[hindx]);
|
pthread_mutex_lock(&hbucket_locks[hindx]);
|
||||||
buf->next = htable[hindx];
|
buf->next = htable[hindx];
|
||||||
htable[hindx] = buf;
|
htable[hindx] = buf;
|
||||||
pthread_mutex_unlock(&hbucket_locks[hindx]);
|
pthread_mutex_unlock(&hbucket_locks[hindx]);
|
||||||
ATOMIC_ADD(hash_entries, 1);
|
ATOMIC_ADD(hash_entries, 1);
|
||||||
}
|
|
||||||
return (buf->ptr);
|
return (buf->ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -355,7 +450,10 @@ slab_free(void *p, void *address)
|
||||||
pbuf = NULL;
|
pbuf = NULL;
|
||||||
while (buf) {
|
while (buf) {
|
||||||
if (buf->ptr == address) {
|
if (buf->ptr == address) {
|
||||||
if (buf->slab_index == -1) {
|
if (hash_entries <=0) {
|
||||||
|
fprintf(stderr, "Inconsistent allocation hash\n");
|
||||||
|
abort();
|
||||||
|
}
|
||||||
if (pbuf)
|
if (pbuf)
|
||||||
pbuf->next = buf->next;
|
pbuf->next = buf->next;
|
||||||
else
|
else
|
||||||
|
@ -363,22 +461,16 @@ slab_free(void *p, void *address)
|
||||||
pthread_mutex_unlock(&hbucket_locks[hindx]);
|
pthread_mutex_unlock(&hbucket_locks[hindx]);
|
||||||
ATOMIC_SUB(hash_entries, 1);
|
ATOMIC_SUB(hash_entries, 1);
|
||||||
|
|
||||||
|
if (buf->slab == NULL) {
|
||||||
free(buf->ptr);
|
free(buf->ptr);
|
||||||
free(buf);
|
free(buf);
|
||||||
found = 1;
|
found = 1;
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
if (pbuf)
|
pthread_mutex_lock(&(buf->slab->slab_lock));
|
||||||
pbuf->next = buf->next;
|
buf->next = buf->slab->avail;
|
||||||
else
|
buf->slab->avail = buf;
|
||||||
htable[hindx] = buf->next;
|
pthread_mutex_unlock(&(buf->slab->slab_lock));
|
||||||
pthread_mutex_unlock(&hbucket_locks[hindx]);
|
|
||||||
ATOMIC_SUB(hash_entries, 1);
|
|
||||||
|
|
||||||
pthread_mutex_lock(&(slabheads[buf->slab_index].slab_lock));
|
|
||||||
buf->next = slabheads[buf->slab_index].avail;
|
|
||||||
slabheads[buf->slab_index].avail = buf;
|
|
||||||
pthread_mutex_unlock(&(slabheads[buf->slab_index].slab_lock));
|
|
||||||
found = 1;
|
found = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@ void slab_cleanup(int quiet);
|
||||||
void *slab_alloc(void *p, size_t size);
|
void *slab_alloc(void *p, size_t size);
|
||||||
void *slab_calloc(void *p, size_t items, size_t size);
|
void *slab_calloc(void *p, size_t items, size_t size);
|
||||||
void slab_free(void *p, void *address);
|
void slab_free(void *p, void *address);
|
||||||
|
int slab_cache_add(size_t size);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,14 @@ static int ttt = 0;
|
||||||
#define kNumBitPriceShiftBits 4
|
#define kNumBitPriceShiftBits 4
|
||||||
#define kBitPrice (1 << kNumBitPriceShiftBits)
|
#define kBitPrice (1 << kNumBitPriceShiftBits)
|
||||||
|
|
||||||
|
#ifdef _LZMA_PROB32
|
||||||
|
#define CLzmaProb UInt32
|
||||||
|
#else
|
||||||
|
#define CLzmaProb UInt16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LITPROB_SZ(lclp) ((0x300 << lclp) * sizeof(CLzmaProb))
|
||||||
|
|
||||||
#ifdef __USE_SSE_INTRIN__
|
#ifdef __USE_SSE_INTRIN__
|
||||||
#define MOV_DBL_QUAD(mem, val) __asm (\
|
#define MOV_DBL_QUAD(mem, val) __asm (\
|
||||||
"movntiq %[val], (%[ptr1]);"\
|
"movntiq %[val], (%[ptr1]);"\
|
||||||
|
@ -114,6 +122,8 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
|
||||||
void LzmaEncProps_Normalize(CLzmaEncProps *p)
|
void LzmaEncProps_Normalize(CLzmaEncProps *p)
|
||||||
{
|
{
|
||||||
int level = p->level;
|
int level = p->level;
|
||||||
|
unsigned lclp;
|
||||||
|
|
||||||
if (!p->normalized) {
|
if (!p->normalized) {
|
||||||
if (level < 0) level = 5;
|
if (level < 0) level = 5;
|
||||||
p->level = level;
|
p->level = level;
|
||||||
|
@ -126,6 +136,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
|
||||||
if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
|
if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
|
||||||
if (p->numHashBytes < 0) p->numHashBytes = 4;
|
if (p->numHashBytes < 0) p->numHashBytes = 4;
|
||||||
if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
|
if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
|
||||||
|
lclp = p->lc + p->lp;
|
||||||
|
p->litprob_sz = LITPROB_SZ(lclp);
|
||||||
if (p->numThreads < 0)
|
if (p->numThreads < 0)
|
||||||
p->numThreads =
|
p->numThreads =
|
||||||
#ifndef _7ZIP_ST
|
#ifndef _7ZIP_ST
|
||||||
|
@ -238,12 +250,6 @@ typedef struct
|
||||||
|
|
||||||
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
|
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
|
||||||
|
|
||||||
#ifdef _LZMA_PROB32
|
|
||||||
#define CLzmaProb UInt32
|
|
||||||
#else
|
|
||||||
#define CLzmaProb UInt16
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define LZMA_PB_MAX 4
|
#define LZMA_PB_MAX 4
|
||||||
#define LZMA_LC_MAX 8
|
#define LZMA_LC_MAX 8
|
||||||
#define LZMA_LP_MAX 4
|
#define LZMA_LP_MAX 4
|
||||||
|
@ -423,7 +429,7 @@ void LzmaEnc_SaveState(CLzmaEncHandle pp)
|
||||||
memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
|
memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
|
||||||
memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
|
memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
|
||||||
memcpy(dest->reps, p->reps, sizeof(p->reps));
|
memcpy(dest->reps, p->reps, sizeof(p->reps));
|
||||||
memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb));
|
memcpy(dest->litProbs, p->litProbs, LITPROB_SZ(p->lclp));
|
||||||
}
|
}
|
||||||
|
|
||||||
void LzmaEnc_RestoreState(CLzmaEncHandle pp)
|
void LzmaEnc_RestoreState(CLzmaEncHandle pp)
|
||||||
|
@ -449,7 +455,7 @@ void LzmaEnc_RestoreState(CLzmaEncHandle pp)
|
||||||
memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
|
memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
|
||||||
memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
|
memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
|
||||||
memcpy(dest->reps, p->reps, sizeof(p->reps));
|
memcpy(dest->reps, p->reps, sizeof(p->reps));
|
||||||
memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb));
|
memcpy(dest->litProbs, p->litProbs, LITPROB_SZ(dest->lclp));
|
||||||
}
|
}
|
||||||
|
|
||||||
SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
|
SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
|
||||||
|
@ -2063,8 +2069,8 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, I
|
||||||
if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp)
|
if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp)
|
||||||
{
|
{
|
||||||
LzmaEnc_FreeLits(p, alloc);
|
LzmaEnc_FreeLits(p, alloc);
|
||||||
p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
|
p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, LITPROB_SZ(lclp));
|
||||||
p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
|
p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, LITPROB_SZ(lclp));
|
||||||
if (p->litProbs == 0 || p->saveState.litProbs == 0)
|
if (p->litProbs == 0 || p->saveState.litProbs == 0)
|
||||||
{
|
{
|
||||||
LzmaEnc_FreeLits(p, alloc);
|
LzmaEnc_FreeLits(p, alloc);
|
||||||
|
|
|
@ -30,6 +30,7 @@ typedef struct _CLzmaEncProps
|
||||||
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
|
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
|
||||||
int numThreads; /* 1 or 2, default = 2 */
|
int numThreads; /* 1 or 2, default = 2 */
|
||||||
int normalized;
|
int normalized;
|
||||||
|
size_t litprob_sz;
|
||||||
} CLzmaEncProps;
|
} CLzmaEncProps;
|
||||||
|
|
||||||
extern void LzmaEncProps_Init(CLzmaEncProps *p);
|
extern void LzmaEncProps_Init(CLzmaEncProps *p);
|
||||||
|
|
|
@ -77,6 +77,7 @@ lzma_init(void **data, int *level, ssize_t chunksize)
|
||||||
if (*level > 9) *level = 9;
|
if (*level > 9) *level = 9;
|
||||||
p->level = *level;
|
p->level = *level;
|
||||||
LzmaEncProps_Normalize(p);
|
LzmaEncProps_Normalize(p);
|
||||||
|
slab_cache_add(p->litprob_sz);
|
||||||
}
|
}
|
||||||
*data = p;
|
*data = p;
|
||||||
return (0);
|
return (0);
|
||||||
|
|
4
main.c
4
main.c
|
@ -659,6 +659,10 @@ start_compress(const char *filename, uint64_t chunksize, int level)
|
||||||
nprocs = nthreads;
|
nprocs = nthreads;
|
||||||
|
|
||||||
fprintf(stderr, "Scaling to %d threads\n", nprocs);
|
fprintf(stderr, "Scaling to %d threads\n", nprocs);
|
||||||
|
slab_cache_add(chunksize);
|
||||||
|
slab_cache_add(compressed_chunksize + CHDR_SZ);
|
||||||
|
slab_cache_add(sizeof (struct cmp_data));
|
||||||
|
|
||||||
dary = (struct cmp_data **)slab_alloc(NULL, sizeof (struct cmp_data *) * nprocs);
|
dary = (struct cmp_data **)slab_alloc(NULL, sizeof (struct cmp_data *) * nprocs);
|
||||||
cread_buf = (uchar_t *)slab_alloc(NULL, chunksize);
|
cread_buf = (uchar_t *)slab_alloc(NULL, chunksize);
|
||||||
if (!cread_buf) {
|
if (!cread_buf) {
|
||||||
|
|
Loading…
Reference in a new issue