Work in progress global dedupe config loader.
This commit is contained in:
parent
2909a3abff
commit
3b1d6b55fe
4 changed files with 276 additions and 0 deletions
142
rabin/global/config.c
Normal file
142
rabin/global/config.c
Normal file
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
* This file is a part of Pcompress, a chunked parallel multi-
|
||||
* algorithm lossless compression and decompression program.
|
||||
*
|
||||
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <fcntl.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <ctype.h>
|
||||
#include <sys/stat.h>
|
||||
#include <rabin_dedup.h>
|
||||
|
||||
#include "initdb.h"
|
||||
|
||||
#define ONE_PB (1125899906842624ULL)
|
||||
#define ONE_TB (1099511627776ULL)
|
||||
#define FOUR_MB (4194304ULL)
|
||||
#define EIGHT_MB (8388608ULL)
|
||||
|
||||
int
|
||||
read_config(char *configfile, archive_config_t *cfg)
|
||||
{
|
||||
FILE *fh;
|
||||
char line[255];
|
||||
uint32_t container_sz_bytes, segment_sz_bytes, total_dirs, i;
|
||||
|
||||
fh = fopen(configfile, "r");
|
||||
if (fh == NULL) {
|
||||
perror(" ");
|
||||
return (1);
|
||||
}
|
||||
while (fgets(line, 255, fh) != NULL) {
|
||||
int pos;
|
||||
|
||||
if (strlen(line) < 9 || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
pos = strchr(line, '=');
|
||||
if (pos == NULL) continue;
|
||||
|
||||
pos++; // Skip '=' char
|
||||
while (isspace(*pos)) pos++;
|
||||
|
||||
if (strncmp(line, "CHUNKSZ", 7) == 0) {
|
||||
int ck = atoi(pos);
|
||||
if (ck < MIN_CK || ck > MAX_CK) {
|
||||
fprintf(stderr, "Invalid Chunk Size: %d\n", ck);
|
||||
fclose(fh);
|
||||
return (1);
|
||||
}
|
||||
cfg->chunk_sz = ck;
|
||||
|
||||
} else if (strncmp(line, "ROOTDIR") == 0) {
|
||||
struct stat sb;
|
||||
if (stat(pos, &sb) == -1) {
|
||||
if (errno != ENOENT) {
|
||||
perror(" ");
|
||||
fprintf(stderr, "Invalid ROOTDIR\n");
|
||||
fclose(fh);
|
||||
return (1);
|
||||
} else {
|
||||
memset(cfg->rootdir, 0, PATH_MAX+1);
|
||||
strncpy(cfg->rootdir, pos, PATH_MAX);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Invalid ROOTDIR. It already exists.\n");
|
||||
fclose(fh);
|
||||
return (1);
|
||||
}
|
||||
} else if (strncmp(line, "ARCHIVESZ") == 0) {
|
||||
int ovr;
|
||||
ssize_t arch_sz;
|
||||
ovr = parse_numeric(&arch_sz, pos);
|
||||
if (ovr == 1) {
|
||||
fprintf(stderr, "ARCHIVESZ value too large.\n");
|
||||
fclose(fh);
|
||||
return (1);
|
||||
}
|
||||
if (ovr == 2) {
|
||||
fprintf(stderr, "Invalid ARCHIVESZ value.\n");
|
||||
fclose(fh);
|
||||
return (1);
|
||||
}
|
||||
cfg->archive_sz = arch_sz;
|
||||
}
|
||||
}
|
||||
fclose(fh);
|
||||
|
||||
/*
|
||||
* Now compute the remaining parameters.
|
||||
*/
|
||||
cfg->chunk_sz_bytes = RAB_BLK_AVG_SZ(cfg->chunk_sz);
|
||||
cfg->directory_levels = 2;
|
||||
if (cfg->archive_sz < ONE_TB) {
|
||||
segment_sz_bytes = FOUR_MB;
|
||||
cfg->directory_fanout = 128;
|
||||
|
||||
} else if (cfg->archive_sz < ONE_PB) {
|
||||
segment_sz_bytes = EIGHT_MB;
|
||||
cfg->directory_fanout = 256;
|
||||
} else {
|
||||
segment_sz_bytes = EIGHT_MB;
|
||||
cfg->directory_fanout = 256;
|
||||
cfg->directory_levels = 3;
|
||||
}
|
||||
|
||||
cfg->segment_sz = segment_sz_bytes / cfg->chunk_sz_bytes;
|
||||
|
||||
total_dirs = 1;
|
||||
for (i = 0; i < cfg->directory_levels; i++)
|
||||
total_dirs *= cfg->directory_fanout;
|
||||
|
||||
// Fixed number of segments in a container for now.
|
||||
cfg->container_sz = CONTAINER_ITEMS;
|
||||
container_sz_bytes = CONTAINER_ITEMS * segment_sz_bytes;
|
||||
|
||||
if (cfg->archive_sz / total_dirs < container_sz)
|
||||
cfg->num_containers = 1;
|
||||
else
|
||||
cfg->num_containers = (cfg->archive_sz / total_dirs) / container_sz + 1;
|
||||
}
|
62
rabin/global/config.h
Normal file
62
rabin/global/config.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* This file is a part of Pcompress, a chunked parallel multi-
|
||||
* algorithm lossless compression and decompression program.
|
||||
*
|
||||
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||
*/
|
||||
|
||||
#ifndef _C_ONFIG_H
|
||||
#define _C_ONFIG_H
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define DEFAULT_SIMILARITY_INTERVAL 10
|
||||
#define DEFAULT_CKSUM "SHA256"
|
||||
#define CONTAINER_ITEMS 2048
|
||||
#define MIN_CK 1
|
||||
#define MAX_CK 5
|
||||
|
||||
// 8GB
|
||||
#define MIN_ARCHIVE_SZ (8589934592ULL)
|
||||
|
||||
typedef struct {
|
||||
char rootdir[PATH_MAX+1];
|
||||
uint32_t chunk_sz; // Numeric ID: 1 - 4k ... 5 - 64k
|
||||
uint64_t archive_sz; // Total size of archive in bytes.
|
||||
int chunk_cksum_type; // Which digest to use for hash based chunk lookup.
|
||||
int similarity_interval; // Similarity based match intervals in %age.
|
||||
// The items below are computed given the above
|
||||
// components.
|
||||
|
||||
uint32_t chunk_sz_bytes;
|
||||
uint32_t segment_sz; // Number of chunks
|
||||
uint32_t container_sz; // Number of segments
|
||||
int directory_fanout; // Number of subdirectories in a directory
|
||||
int directory_levels; // Levels of nested directories
|
||||
int num_containers; // Number of containers in a directory
|
||||
} archive_config_t;
|
||||
|
||||
int read_config(char *configfile, archive_config_t *cfg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
38
rabin/global/initdb.c
Normal file
38
rabin/global/initdb.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* This file is a part of Pcompress, a chunked parallel multi-
|
||||
* algorithm lossless compression and decompression program.
|
||||
*
|
||||
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <fcntl.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <utils.h>
|
||||
#include <config.h>
|
||||
|
||||
#include "initdb.h"
|
||||
|
||||
int
|
||||
init_global_db(char *configfile)
|
||||
{
|
||||
}
|
34
rabin/global/initdb.h
Normal file
34
rabin/global/initdb.h
Normal file
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* This file is a part of Pcompress, a chunked parallel multi-
|
||||
* algorithm lossless compression and decompression program.
|
||||
*
|
||||
* Copyright (C) 2012 Moinak Ghosh. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* moinakg@belenix.org, http://moinakg.wordpress.com/
|
||||
*/
|
||||
|
||||
#ifndef _INITDB_H
|
||||
#define _INITDB_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int init_global_db(char *configfile);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Loading…
Reference in a new issue