pcompress/rabin/global/db.c

154 lines
4.1 KiB
C
Raw Normal View History

/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
#include <sys/types.h>
#include <sys/param.h>
#include <fcntl.h>
#include <time.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <utils.h>
2013-02-14 17:40:53 +00:00
#include <allocator.h>
#include <pthread.h>
2013-02-16 18:03:06 +00:00
#include "db.h"
#include "config.h"
#define ONE_PB (1125899906842624ULL)
#define ONE_TB (1099511627776ULL)
#define FOUR_MB (4194304ULL)
#define EIGHT_MB (8388608ULL)
2013-02-14 17:40:53 +00:00
/*
* Hashtable structures for in-memory index.
*/
typedef struct _hash_entry {
2013-02-16 18:03:06 +00:00
segment_entry_t *seg;
2013-02-14 17:40:53 +00:00
struct _hash_entry *next;
struct _hash_entry *lru_prev;
struct _hash_entry *lru_next;
2013-02-16 18:03:06 +00:00
uchar_t cksum[1];
2013-02-14 17:40:53 +00:00
} hash_entry_t;
typedef struct {
hash_entry_t **htab;
} htab_t;
typedef struct {
2013-02-16 18:03:06 +00:00
htab_t *list;
2013-02-14 17:40:53 +00:00
pthread_mutex_t *mlist;
hash_entry_t *lru_head;
hash_entry_t *lru_tail;
uint64_t memlimit;
uint64_t memused;
2013-02-16 18:03:06 +00:00
int hash_entry_size;
2013-02-14 17:40:53 +00:00
} htablst_t;
archive_config_t *
init_global_db(char *configfile)
{
archive_config_t *cfg;
int rv;
cfg = calloc(1, sizeof (archive_config_t));
if (!cfg) {
fprintf(stderr, "Memory allocation failure\n");
return (NULL);
}
rv = read_config(configfile, cfg);
if (rv != 0)
return (NULL);
return (cfg);
}
archive_config_t *
2013-02-14 17:40:53 +00:00
init_global_db_s(char *path, uint32_t chunksize, int pct_interval,
2013-02-16 18:03:06 +00:00
compress_algo_t algo, cksum_t ck, cksum_t ck_sim, size_t file_sz,
size_t memlimit)
{
archive_config_t *cfg;
int rv;
2013-02-14 17:40:53 +00:00
float diff;
cfg = calloc(1, sizeof (archive_config_t));
2013-02-16 18:03:06 +00:00
rv = set_config_s(cfg, algo, ck, ck_sim, chunksize, file_sz, chunks_per_seg, pct_interval);
2013-02-14 17:40:53 +00:00
if (path != NULL) {
printf("Disk based index not yet implemented.\n");
free(cfg);
return (NULL);
} else {
uint32_t hash_slots, intervals, i;
uint64_t memreqd;
htablst_t *htablst;
2013-02-16 18:03:06 +00:00
int hash_entry_size;
2013-02-14 17:40:53 +00:00
// Compute total hashtable entries first
intervals = 100 / pct_interval - 1;
hash_slots = file_sz / cfg->segment_sz_bytes + 1;
hash_slots *= intervals;
2013-02-16 18:03:06 +00:00
hash_entry_size = sizeof (hash_entry_t) + cfg->similarity_cksum_sz - 1;
2013-02-14 17:40:53 +00:00
// Compute memory required to hold all hash entries assuming worst case 50%
// occupancy.
2013-02-16 18:03:06 +00:00
memreqd = hash_slots * (hash_entry_size + sizeof (hash_entry_t *) +
(sizeof (hash_entry_t *)) / 2);
2013-02-14 17:40:53 +00:00
memreqd += hash_slots * sizeof (hash_entry_t **);
diff = (float)pct_interval / 100.0;
// Reduce hash_slots to remain within memlimit
while (memreqd > memlimit) {
hash_slots -= (hash_slots * diff);
2013-02-16 18:03:06 +00:00
memreqd = hash_slots * (hash_entry_size + sizeof (hash_entry_t *) +
2013-02-14 17:40:53 +00:00
(sizeof (hash_entry_t *)) / 2);
memreqd += hash_slots * sizeof (hash_entry_t **);
}
// Now create as many hash tables as there are similarity match intervals
// each having hash_slots / intervals slots.
htablst = calloc(1, sizeof (htablst_t));
htablst->memlimit = memlimit;
2013-02-16 18:03:06 +00:00
htablst->list = (htab_t *)calloc(intervals, sizeof (htab_t));
2013-02-14 17:40:53 +00:00
htablst->mlist = (pthread_mutex_t *)malloc(intervals * sizeof (pthread_mutex_t));
2013-02-16 18:03:06 +00:00
htablst->hash_entry_size = hash_entry_size;
2013-02-14 17:40:53 +00:00
for (i = 0; i < intervals; i++) {
2013-02-16 18:03:06 +00:00
htablst->list[i].htab = (hash_entry_t **)calloc(hash_slots / intervals,
2013-02-14 17:40:53 +00:00
sizeof (hash_entry_t *));
htablst->memused += ((hash_slots / intervals) * (sizeof (hash_entry_t *)));
pthread_mutex_init(&(htablst->mlist[i]), NULL);
}
cfg->dbdata = htablst;
2013-02-16 18:03:06 +00:00
slab_cache_add(hash_entry_size);
2013-02-14 17:40:53 +00:00
slab_cache_add(cfg->chunk_cksum_sz);
}
return (cfg);
}
int
db_insert_s(archive_config_t *cfg, uchar_t *cksum, int interval_num)
{
}