/* Copyright 2006 David Crawshaw, released under the new BSD license. * Version 2, from http://www.zentus.com/c/hash.html */ /* Changed from just "hash" to "hive_hash" to reduce collisions when linked * in with erlang * * Dave Smith (dsmith@thehive.com) 12/08 */ #include #include #include #include #include "hive_hash.h" /* Table is sized by primes to minimise clustering. See: http://planetmath.org/encyclopedia/GoodHashTablePrimes.html */ static const unsigned int sizes[] = { 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741 }; static const int sizes_count = sizeof(sizes) / sizeof(sizes[0]); static const float load_factor = 0.65; struct record { unsigned int hash; const char *key; void *value; }; struct hive_hash { struct record *records; unsigned int records_count; unsigned int size_index; }; static int hive_hash_grow(hive_hash *h) { int i; struct record *old_recs; unsigned int old_recs_length; old_recs_length = sizes[h->size_index]; old_recs = h->records; if (h->size_index == sizes_count - 1) return -1; if ((h->records = calloc(sizes[++h->size_index], sizeof(struct record))) == NULL) { h->records = old_recs; return -1; } h->records_count = 0; // rehash table for (i=0; i < old_recs_length; i++) if (old_recs[i].hash && old_recs[i].key) hive_hash_add(h, old_recs[i].key, old_recs[i].value); free(old_recs); return 0; } /* algorithm djb2 */ static unsigned int strhash(const char *str) { int c; int hash = 5381; while ((c = *str++)) hash = hash * 33 + c; return hash == 0 ? 1 : hash; } hive_hash * hive_hash_new(unsigned int capacity) { struct hive_hash *h; int i, sind = sizes_count; capacity /= load_factor; // JDM: This can leave sind uninitialized for (i=0; i < sizes_count; i++) if (sizes[i] > capacity) { sind = i; break; } assert(sizes_count != sind); if ((h = malloc(sizeof(struct hive_hash))) == NULL) return NULL; if ((h->records = calloc(sizes[sind], sizeof(struct record))) == NULL) { free(h); return NULL; } h->records_count = 0; h->size_index = sind; return h; } void hive_hash_destroy(hive_hash *h) { free(h->records); free(h); } int hive_hash_add(hive_hash *h, const char *key, void *value) { struct record *recs; int rc; unsigned int off, ind, size, code; if (key == NULL || *key == '\0') return -2; if (h->records_count > sizes[h->size_index] * load_factor) { rc = hive_hash_grow(h); if (rc) return rc; } code = strhash(key); recs = h->records; size = sizes[h->size_index]; ind = code % size; off = 0; while (recs[ind].key) ind = (code + (int)pow(++off,2)) % size; recs[ind].hash = code; recs[ind].key = key; recs[ind].value = value; h->records_count++; return 0; } void * hive_hash_get(hive_hash *h, const char *key) { struct record *recs; unsigned int off, ind, size; unsigned int code = strhash(key); recs = h->records; size = sizes[h->size_index]; ind = code % size; off = 0; // search on hash which remains even if a record has been removed, // so hash_remove() does not need to move any collision records while (recs[ind].hash) { if ((code == recs[ind].hash) && recs[ind].key && strcmp(key, recs[ind].key) == 0) return recs[ind].value; ind = (code + (int)pow(++off,2)) % size; } return NULL; } void * hive_hash_remove(hive_hash *h, const char *key) { unsigned int code = strhash(key); struct record *recs; void * value; unsigned int off, ind, size; recs = h->records; size = sizes[h->size_index]; ind = code % size; off = 0; while (recs[ind].hash) { if ((code == recs[ind].hash) && recs[ind].key && strcmp(key, recs[ind].key) == 0) { // do not erase hash, so probes for collisions succeed value = recs[ind].value; recs[ind].key = 0; recs[ind].value = 0; h->records_count--; return value; } ind = (code + (int)pow(++off, 2)) % size; } return NULL; } unsigned int hive_hash_size(hive_hash *h) { return h->records_count; }