Work in progress global dedupe config loader.

This commit is contained in:
Moinak Ghosh 2012-11-19 21:41:56 +05:30
parent 2909a3abff
commit 3b1d6b55fe
4 changed files with 276 additions and 0 deletions

142
rabin/global/config.c Normal file
View file

@ -0,0 +1,142 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
#include <sys/types.h>
#include <sys/param.h>
#include <fcntl.h>
#include <time.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <ctype.h>
#include <sys/stat.h>
#include <rabin_dedup.h>
#include "initdb.h"
#define ONE_PB (1125899906842624ULL)
#define ONE_TB (1099511627776ULL)
#define FOUR_MB (4194304ULL)
#define EIGHT_MB (8388608ULL)
int
read_config(char *configfile, archive_config_t *cfg)
{
FILE *fh;
char line[255];
uint32_t container_sz_bytes, segment_sz_bytes, total_dirs, i;
fh = fopen(configfile, "r");
if (fh == NULL) {
perror(" ");
return (1);
}
while (fgets(line, 255, fh) != NULL) {
int pos;
if (strlen(line) < 9 || line[0] == '#') {
continue;
}
pos = strchr(line, '=');
if (pos == NULL) continue;
pos++; // Skip '=' char
while (isspace(*pos)) pos++;
if (strncmp(line, "CHUNKSZ", 7) == 0) {
int ck = atoi(pos);
if (ck < MIN_CK || ck > MAX_CK) {
fprintf(stderr, "Invalid Chunk Size: %d\n", ck);
fclose(fh);
return (1);
}
cfg->chunk_sz = ck;
} else if (strncmp(line, "ROOTDIR") == 0) {
struct stat sb;
if (stat(pos, &sb) == -1) {
if (errno != ENOENT) {
perror(" ");
fprintf(stderr, "Invalid ROOTDIR\n");
fclose(fh);
return (1);
} else {
memset(cfg->rootdir, 0, PATH_MAX+1);
strncpy(cfg->rootdir, pos, PATH_MAX);
}
} else {
fprintf(stderr, "Invalid ROOTDIR. It already exists.\n");
fclose(fh);
return (1);
}
} else if (strncmp(line, "ARCHIVESZ") == 0) {
int ovr;
ssize_t arch_sz;
ovr = parse_numeric(&arch_sz, pos);
if (ovr == 1) {
fprintf(stderr, "ARCHIVESZ value too large.\n");
fclose(fh);
return (1);
}
if (ovr == 2) {
fprintf(stderr, "Invalid ARCHIVESZ value.\n");
fclose(fh);
return (1);
}
cfg->archive_sz = arch_sz;
}
}
fclose(fh);
/*
* Now compute the remaining parameters.
*/
cfg->chunk_sz_bytes = RAB_BLK_AVG_SZ(cfg->chunk_sz);
cfg->directory_levels = 2;
if (cfg->archive_sz < ONE_TB) {
segment_sz_bytes = FOUR_MB;
cfg->directory_fanout = 128;
} else if (cfg->archive_sz < ONE_PB) {
segment_sz_bytes = EIGHT_MB;
cfg->directory_fanout = 256;
} else {
segment_sz_bytes = EIGHT_MB;
cfg->directory_fanout = 256;
cfg->directory_levels = 3;
}
cfg->segment_sz = segment_sz_bytes / cfg->chunk_sz_bytes;
total_dirs = 1;
for (i = 0; i < cfg->directory_levels; i++)
total_dirs *= cfg->directory_fanout;
// Fixed number of segments in a container for now.
cfg->container_sz = CONTAINER_ITEMS;
container_sz_bytes = CONTAINER_ITEMS * segment_sz_bytes;
if (cfg->archive_sz / total_dirs < container_sz)
cfg->num_containers = 1;
else
cfg->num_containers = (cfg->archive_sz / total_dirs) / container_sz + 1;
}

62
rabin/global/config.h Normal file
View file

@ -0,0 +1,62 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
#ifndef _C_ONFIG_H
#define _C_ONFIG_H
#include <limits.h>
#ifdef __cplusplus
extern "C" {
#endif
#define DEFAULT_SIMILARITY_INTERVAL 10
#define DEFAULT_CKSUM "SHA256"
#define CONTAINER_ITEMS 2048
#define MIN_CK 1
#define MAX_CK 5
// 8GB
#define MIN_ARCHIVE_SZ (8589934592ULL)
typedef struct {
char rootdir[PATH_MAX+1];
uint32_t chunk_sz; // Numeric ID: 1 - 4k ... 5 - 64k
uint64_t archive_sz; // Total size of archive in bytes.
int chunk_cksum_type; // Which digest to use for hash based chunk lookup.
int similarity_interval; // Similarity based match intervals in %age.
// The items below are computed given the above
// components.
uint32_t chunk_sz_bytes;
uint32_t segment_sz; // Number of chunks
uint32_t container_sz; // Number of segments
int directory_fanout; // Number of subdirectories in a directory
int directory_levels; // Levels of nested directories
int num_containers; // Number of containers in a directory
} archive_config_t;
int read_config(char *configfile, archive_config_t *cfg);
#ifdef __cplusplus
}
#endif
#endif

38
rabin/global/initdb.c Normal file
View file

@ -0,0 +1,38 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
#include <sys/types.h>
#include <sys/param.h>
#include <fcntl.h>
#include <time.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <utils.h>
#include <config.h>
#include "initdb.h"
int
init_global_db(char *configfile)
{
}

34
rabin/global/initdb.h Normal file
View file

@ -0,0 +1,34 @@
/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
#ifndef _INITDB_H
#define _INITDB_H
#ifdef __cplusplus
extern "C" {
#endif
int init_global_db(char *configfile);
#ifdef __cplusplus
}
#endif
#endif