From 7f81869874b02454f09695530c129a5f0e30b9ae Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 20 Oct 2013 23:54:27 +0530 Subject: [PATCH] Archiving support using Libarchive: Work in progress changes. Change all perror() calls to use logger. Make the config script a little verbose. --- Makefile.in | 15 +-- README.md | 3 - archive/pc_archive.c | 177 ++++++++++++++++++++++++++++++ archive/pc_archive.h | 50 +++++++++ bsdiff/bsdiff.c | 4 +- config | 45 +++++++- pcompress.c | 201 ++++++++++++++--------------------- pcompress.h | 7 +- rabin/global/dedupe_config.c | 6 +- rabin/global/index.c | 6 +- rabin/rabin_dedup.c | 2 +- utils/utils.c | 95 ++++++++++++++++- utils/utils.h | 12 ++- 13 files changed, 477 insertions(+), 146 deletions(-) create mode 100644 archive/pc_archive.c create mode 100644 archive/pc_archive.h diff --git a/Makefile.in b/Makefile.in index 9a1b9c6..fbd2eda 100644 --- a/Makefile.in +++ b/Makefile.in @@ -28,9 +28,9 @@ LINKLIB=pcompress LIBVER=1 MAINSRCS = utils/utils.c allocator.c lzma_compress.c ppmd_compress.c \ adaptive_compress.c lzfx_compress.c lz4_compress.c none_compress.c \ - utils/xxhash_base.c utils/heap.c utils/cpuid.c pcompress.c + utils/xxhash_base.c utils/heap.c utils/cpuid.c archive/pc_archive.c pcompress.c MAINHDRS = allocator.h pcompress.h utils/utils.h utils/xxhash.h utils/heap.h \ - utils/cpuid.h utils/xxhash.h + utils/cpuid.h utils/xxhash.h archive/pc_archive.h MAINOBJS = $(MAINSRCS:.c=.o) PROGSRCS = main.c @@ -182,7 +182,8 @@ KECCAK_OBJS_ASM = $(KECCAK_SRCS_ASM:.s=.o) BAKFILES = *~ lzma/*~ lzfx/*~ lz4/*~ rabin/*~ bsdiff/*~ lzp/*~ utils/*~ crypto/sha2/*~ \ crypto/sha2/intel/*~ crypto/aes/*~ crypto/scrypt/*~ crypto/*~ rabin/global/*~ \ - delta2/*~ crypto/keccak/*~ transpose/*~ crypto/skein/*~ crypto/keccak/*.o + delta2/*~ crypto/keccak/*~ transpose/*~ crypto/skein/*~ crypto/keccak/*.o \ + archive/*~ RM = rm -f RM_RF = rm -rf @@ -191,14 +192,14 @@ COMMON_CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT -I./lzp @LIBBSCCPPFLAGS@ -I./crypto/skein -I./utils -I./crypto/sha2 \ -I./crypto/scrypt -I./crypto/aes -I./crypto @KEYLEN@ -I./rabin/global \ -I./crypto/keccak -I./transpose -I./crypto/blake2 $(EXTRA_CPPFLAGS) \ - -I./crypto/xsalsa20 -pedantic -Wall -std=gnu99 \ + -I./crypto/xsalsa20 -I./archive -pedantic -Wall -std=gnu99 \ -fno-strict-aliasing -Wno-unused-but-set-variable -Wno-enum-compare \ - @COMPAT_CPPFLAGS@ @XSALSA20_DEBUG@ + @COMPAT_CPPFLAGS@ @XSALSA20_DEBUG@ -I@LIBARCHIVE_INC@ COMMON_VEC_FLAGS = -ftree-vectorize COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block LDLIBS = -ldl -L./buildtmp -Wl,-R@LIBBZ2_DIR@ -lbz2 -L./buildtmp -Wl,-R@LIBZ_DIR@ -lz -lm @LIBBSCLFLAGS@ \ - -L./buildtmp -Wl,-R@OPENSSL_LIBDIR@ -lcrypto -lrt $(EXTRA_LDFLAGS) -Wl,-R/usr/lib,--enable-new-dtags \ - -Wl,-R/usr/lib64,--enable-new-dtags + -L./buildtmp -Wl,-R@OPENSSL_LIBDIR@ -lcrypto -lrt -Wl,-R@LIBARCHIVE_DIR@ -larchive $(EXTRA_LDFLAGS) \ + -Wl,-R/usr/lib,--enable-new-dtags -Wl,-R/usr/lib64,--enable-new-dtags OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \ $(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) $(DELTA2OBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) \ $(SKEIN_BLOCK_OBJ) @SHA2ASM_OBJS@ @SHA2_OBJS@ $(KECCAK_OBJS) $(KECCAK_OBJS_ASM) \ diff --git a/README.md b/README.md index e10bd77..3c2713b 100644 --- a/README.md +++ b/README.md @@ -32,9 +32,6 @@ Pcompress also supports encryption via AES and uses Scrypt from Tarsnap for Password Based Key generation. A unique key is generated per session even if the same password is used and HMAC is used to do authentication. -NOTE: This utility is Not an archiver. It compresses only single files or - datastreams. To archive use something else like tar, cpio or pax. - Links of Interest ================= diff --git a/archive/pc_archive.c b/archive/pc_archive.c new file mode 100644 index 0000000..e158775 --- /dev/null +++ b/archive/pc_archive.c @@ -0,0 +1,177 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. + * If not, see . + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pc_archive.h" + +#undef _FEATURES_H +#define _XOPEN_SOURCE 700 +#include +#include + +#define ARC_ENTRY_OVRHEAD 500 +static struct arc_list_state { + uchar_t *pbuf; + uint64_t bufsiz, bufpos, arc_size; + int fd; +} a_state; +pthread_mutex_t nftw_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* + * Build list of pathnames in a temp file. + */ +static int +add_pathname(const char *fpath, const struct stat *sb, + int tflag, struct FTW *ftwbuf) +{ + short len; + uchar_t *buf; + + if (tflag == FTW_DP) return (0); + if (tflag == FTW_DNR || tflag == FTW_NS) { + log_msg(LOG_WARN, 0, "Cannot access %s\n", fpath); + return (0); + } + a_state.arc_size += (sb->st_size + ARC_ENTRY_OVRHEAD); + len = strlen(fpath); + if (a_state.bufpos + len + 14 > a_state.bufsiz) { + ssize_t wrtn = Write(a_state.fd, a_state.pbuf, a_state.bufpos); + if (wrtn < a_state.bufpos) { + log_msg(LOG_ERR, 1, "Write: "); + return (-1); + } + a_state.bufpos = 0; + } + buf = a_state.pbuf + a_state.bufpos; + *((short *)buf) = len; + buf += 2; + memcpy(buf, fpath, len); + buf += len; + *((int *)buf) = tflag; + buf += 4; + *((uint64_t *)buf) = sb->st_size; + a_state.bufpos += (len + 14); + return (0); +} + +/* + * Archiving related functions. + * This one creates a list of files to be included into the archive and + * sets up the libarchive context. + */ +int +setup_archive(pc_ctx_t *pctx, struct stat *sbuf) +{ + char *tmpfile, *tmp; + int err, fd, pipefd[2]; + uchar_t *pbuf; + struct archive *arc; + + tmpfile = pctx->archive_members_file; + tmp = get_temp_dir(); + strcpy(tmpfile, tmp); + free(tmp); + + strcat(tmpfile, "/.pcompXXXXXX"); + if ((fd = mkstemp(tmpfile)) == -1) { + log_msg(LOG_ERR, 1, "mkstemp errored."); + return (-1); + } + + add_fname(tmpfile); + pbuf = malloc(pctx->chunksize); + if (pbuf == NULL) { + log_msg(LOG_ERR, 0, "Out of memory."); + close(fd); unlink(tmpfile); + return (-1); + } + + /* + * nftw requires using global state variable. So we lock to be mt-safe. + * This means only one directory tree scan can happen at a time. + */ + pthread_mutex_lock(&nftw_mutex); + a_state.pbuf = pbuf; + a_state.bufsiz = pctx->chunksize; + a_state.bufpos = 0; + a_state.arc_size = 0; + a_state.fd = fd; + err = nftw(pctx->filename, add_pathname, 1024, FTW_PHYS); // 'pctx->filename' has dir name here + if (a_state.bufpos > 0) { + ssize_t wrtn = Write(a_state.fd, a_state.pbuf, a_state.bufpos); + if (wrtn < a_state.bufpos) { + log_msg(LOG_ERR, 1, "Write failed."); + close(fd); unlink(tmpfile); + return (-1); + } + a_state.bufpos = 0; + } + pctx->archive_size = a_state.arc_size; + sbuf->st_size = a_state.arc_size; + pthread_mutex_unlock(&nftw_mutex); + lseek(fd, 0, SEEK_SET); + free(pbuf); + + if (pipe(pipefd) == -1) { + log_msg(LOG_ERR, 1, "Unable to create archiver pipe.\n"); + close(fd); unlink(tmpfile); + return (-1); + } + + pctx->uncompfd = pipefd[0]; // Read side + pctx->archive_data_fd = pipefd[1]; // Write side + arc = archive_write_new(); + if (!arc) { + log_msg(LOG_ERR, 1, "Unable to create libarchive context.\n"); + close(fd); close(pipefd[0]); close(pipefd[1]); + unlink(tmpfile); + return (-1); + } + archive_write_set_format_pax_restricted(arc); + archive_write_open_fd(arc, pctx->archive_data_fd); + pctx->archive_ctx = arc; + + return (0); +} + +/* + * Thread function. Archive members and write to pipe. The dispatcher thread + * reads from the other end and compresses. + */ +void * +run_archiver(void *dat) { + return (NULL); +} + diff --git a/archive/pc_archive.h b/archive/pc_archive.h new file mode 100644 index 0000000..a83f12f --- /dev/null +++ b/archive/pc_archive.h @@ -0,0 +1,50 @@ +/* + * This file is a part of Pcompress, a chunked parallel multi- + * algorithm lossless compression and decompression program. + * + * Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved. + * Use is subject to license terms. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. + * If not, see . + * + * moinakg@belenix.org, http://moinakg.wordpress.com/ + * + */ + +#ifndef _ARCHIVE_H +#define _ARCHIVE_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + char *fpath; + int typeflag; + size_t size; +} archive_list_entry_t; + +/* + * Archiving related functions. + */ +int setup_archive(pc_ctx_t *pctx, struct stat *sbuf); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/bsdiff/bsdiff.c b/bsdiff/bsdiff.c index b35ec30..8d81183 100644 --- a/bsdiff/bsdiff.c +++ b/bsdiff/bsdiff.c @@ -76,7 +76,7 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 #define __IN_BSDIFF__ #include "bscommon.h" -#define MIN(x,y) (((x)<(y)) ? (x) : (y)) +#define BDIFF_MIN(x,y) (((x)<(y)) ? (x) : (y)) static void split(bsize_t *I,bsize_t *V,bsize_t start,bsize_t len,bsize_t h) { @@ -237,7 +237,7 @@ static bsize_t search(bsize_t *I,u_char *oldbuf,bsize_t oldsize, }; x=st+(en-st)/2; - if(memcmp(oldbuf+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) { + if(memcmp(oldbuf+I[x],newbuf,BDIFF_MIN(oldsize-I[x],newsize))<0) { return search(I,oldbuf,oldsize,newbuf,newsize,x,en,pos); } else { return search(I,oldbuf,oldsize,newbuf,newsize,st,x,pos); diff --git a/config b/config index e12b2f6..5b9beb1 100755 --- a/config +++ b/config @@ -41,6 +41,8 @@ ${prog} [] Enable building against an alternate Zlib installation. --with-bzlib= (Default: System) Enable building against an alternate Bzip2 and library installation. +--with-libarchive= (Default: System) + Enable building against an alternate libarchive installation. --no-sse-detect Do NOT attempt to probe the system's SSE/AVX capability for build flags. --no-1.3-archive-compat Disable compatibility with compressed archives created with Pcompress version 1.3 (default: retain compatibility). Hash formats changed from @@ -69,6 +71,7 @@ openssl_libdir= openssl_incdir= libbz2_libdir= libz_libdir= +libarchive_libdir= sha256asmobjs= sha256objs= keylen= @@ -79,6 +82,7 @@ keccak_srcs_asm= extra_opt_flags= zlib_prefix= bzlib_prefix= +libarchive_prefix= sse_detect=1 sse_opt_flags="-msse2" crypto_compat_objs='\$\(CRYPTO_COMPAT_OBJS\)' @@ -90,6 +94,7 @@ salsa20_debug= rm -rf ./buildtmp mkdir ./buildtmp +echo "Checking for GCC ..." # Try a simple compilation cat << _EOF > tst.c #include @@ -115,6 +120,7 @@ then fi # Check bitness of system/toolchain +echo "Checking for 32-bit/64-bit platform ..." bitness=`./tst` if [ $bitness -lt 8 ] then @@ -168,6 +174,9 @@ do --with-bzlib=*) bzlib_prefix=`echo ${arg1} | cut -f2 -d"="` ;; + --with-libarchive=*) + libarchive_prefix=`echo ${arg1} | cut -f2 -d"="` + ;; --use-key256) keylen='-DDEFAULT_KEYLEN=16' ;; @@ -198,6 +207,7 @@ else typ="RELEASE" fi +echo "Checking OS ..." OS=$(uname) skeinblock='\$\(SKEIN_BLOCK_C\)' if [ "$OS" = "Linux" ] @@ -212,6 +222,7 @@ else fi # Check GCC version +echo "Checking GCC version ..." vers=`gcc -dumpversion` OIFS="$IFS" IFS=. @@ -227,6 +238,7 @@ then fi # SSE Detection +echo -n "Checking for CPU SSE version ... " if [ $sse_detect -eq 1 ] then gcc -o sse_level ./utils/sse_level.c ./utils/cpuid.c -I./utils @@ -246,6 +258,7 @@ then echo "" exit 1 fi + echo $sse_ver rm -f sse_level sse_opt_flags="-m${sse_ver}" fi @@ -260,6 +273,7 @@ then # # Detect Yasm # + echo "Checking for Yasm ..." for bindir in /bin /usr/bin /usr/local/bin do if [ -x ${bindir}/yasm ] @@ -297,6 +311,7 @@ else fi # Detect OpenSSL library +echo "Checking for OpenSSL ..." for lib in "${openssl_prefix}/lib64" "${openssl_prefix}/usr/lib64" \ "${openssl_prefix}/lib" "${openssl_prefix}/usr/lib" \ "${openssl_prefix}/ssl/lib64" "${openssl_prefix}/ssl/lib" \ @@ -354,6 +369,7 @@ fi # Check for OpenSSL version +echo "Checking OpenSSL version ..." cat << __EOF > tst.c #include #include @@ -381,6 +397,7 @@ then fi # Check for HMAC_CTX_copy function +echo -n "Checking if the OpenSSL library provides HMAC_CTX_copy function ... " cat << __EOF > tst.c #include #include @@ -407,13 +424,16 @@ gcc ${extra_opt_flags} -I${openssl_incdir} -L${openssl_libdir} -O0 -g tst.c -o t if [ $? -ne 0 ] then openssl_incdir="${openssl_incdir} -D__OSSL_OLD__" + echo "No. Using internal variant." +else + echo "Yes." fi rm -f tst* openssl_libdir="${openssl_libdir},--enable-new-dtags" # Detect other library packages -for libspec in "libbz2:${bzlib_prefix}" "libz:${zlib_prefix}" +for libspec in "libbz2:${bzlib_prefix}" "libz:${zlib_prefix}" "libarchive:${libarchive_prefix}" do _OIFS="$IFS" IFS=":" @@ -422,6 +442,7 @@ do pref=$2 IFS="$_OIFS" + echo "Checking for $libname ..." use_prefix="${pref}" if [ "x${pref}" = "x" ] then @@ -477,10 +498,24 @@ then exit 1 fi +if [ "x${libarchive_libdir}" = "x" ] +then + if [ "x$libarchive_prefix" = "x" ] + then + echo "ERROR: Libarchive not detected." + echo " You may have to install libarchive-devel or libarchive-dev" + else + echo "ERROR: Libarchive not detected in given prefix." + fi + exit 1 +fi + libbz2_inc= libz_inc= +libarchive_inc= # Detect other library headers -for hdr in "libbz2_inc:bzlib.h:${bzlib_prefix}" "libz_inc:zlib.h:${zlib_prefix}" +for hdr in "libbz2_inc:bzlib.h:${bzlib_prefix}" "libz_inc:zlib.h:${zlib_prefix}" \ + "libarchive_inc:archive.h:${libarchive_prefix}" do _OIFS="$IFS" IFS=":" @@ -490,6 +525,7 @@ do pref=$3 IFS="$_OIFS" + echo "Checking for $hdrf ..." use_prefix="${pref}" if [ "x${pref}" = "x" ] then @@ -510,6 +546,7 @@ do done done +echo "Generating Makefile ..." linkvar="LINK" compilevar="COMPILE" compilecppvar="COMPILE_cpp" @@ -542,6 +579,8 @@ libbz2libdirvar="LIBBZ2_DIR" libzlibdirvar="LIBZ_DIR" libbz2incvar="LIBBZ2_INC" libzincvar="LIBZ_INC" +libarchivedirvar="LIBARCHIVE_DIR" +libarchiveincvar="LIBARCHIVE_INC" keccak_srcs_var="KECCAK_SRCS" keccak_hdrs_var="KECCAK_HDRS" @@ -599,5 +638,7 @@ s#@${crypto_compat_flags_var}@#${crypto_compat_flags}#g s#@${salsa20_stream_c_var}@#${salsa20_stream_c}#g s#@${salsa20_stream_asm_var}@#${salsa20_stream_asm}#g s#@${salsa20_debug_var}@#${salsa20_debug}#g +s#@${libarchivedirvar}@#${libarchive_libdir}#g +s#@${libarchiveincvar}@#${libarchive_inc}#g " > Makefile diff --git a/pcompress.c b/pcompress.c index e974fbb..5962faa 100644 --- a/pcompress.c +++ b/pcompress.c @@ -37,7 +37,6 @@ #include #include #include -#include #if defined(sun) || defined(__sun) #include #else @@ -54,6 +53,7 @@ #include #include #include +#include /* * We use 8MB chunks by default. @@ -70,9 +70,6 @@ struct wdata { }; pthread_mutex_t opt_parse = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t f_mutex = PTHREAD_MUTEX_INITIALIZER; -static char *f_name_list[512]; -static int f_count = 512, f_inited = 0; static void * writer_thread(void *dat); static int init_algo(pc_ctx_t *pctx, const char *algo, int bail); @@ -190,52 +187,6 @@ show_compression_stats(pc_ctx_t *pctx) } } -/* - * Temporary file cleanup routines for SIGINT. Maintain a list of - * filenames to be removed in the signal handler. - */ -void -Int_Handler(int signo) -{ - int i; - - for (i = 0; i < f_count; i++) { - if (f_name_list[i] != NULL) { - unlink(f_name_list[i]); - f_name_list[i] = NULL; - } - } - exit(1); -} - -static void -add_fname(char *fn) { - int i; - - pthread_mutex_lock(&f_mutex); - for (i = 0; i < f_count; i++) { - if (f_name_list[i] == NULL) { - f_name_list[i] = fn; - break; - } - } - pthread_mutex_unlock(&f_mutex); -} - -static void -rm_fname(char *fn) { - int i; - - pthread_mutex_lock(&f_mutex); - for (i = 0; i < f_count; i++) { - if (f_name_list[i] != NULL) { - f_name_list[i] = fn; - break; - } - } - pthread_mutex_unlock(&f_mutex); -} - /* * Wrapper functions to pre-process the buffer and then call the main compression routine. * At present only LZP pre-compression is used below. Some extra metadata is added: @@ -721,7 +672,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) if (filename == NULL) { compfd = fileno(stdin); if (compfd == -1) { - perror("fileno "); + log_msg(LOG_ERR, 1, "fileno "); UNCOMP_BAIL; } sbuf.st_size = 0; @@ -747,12 +698,12 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) } else { compfd = fileno(stdin); if (compfd == -1) { - perror("fileno "); + log_msg(LOG_ERR, 1, "fileno "); UNCOMP_BAIL; } uncompfd = fileno(stdout); if (uncompfd == -1) { - perror("fileno "); + log_msg(LOG_ERR, 1, "fileno "); UNCOMP_BAIL; } } @@ -761,7 +712,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) * Read file header pieces and verify. */ if (Read(compfd, algorithm, ALGO_SZ) < ALGO_SZ) { - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); UNCOMP_BAIL; } if (init_algo(pctx, algorithm, 0) != 0) { @@ -777,7 +728,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) Read(compfd, &flags, sizeof (flags)) < sizeof (flags) || Read(compfd, &chunksize, sizeof (chunksize)) < sizeof (chunksize) || Read(compfd, &level, sizeof (level)) < sizeof (level)) { - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); UNCOMP_BAIL; } @@ -908,7 +859,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) UNCOMP_BAIL; } if (Read(compfd, &saltlen, sizeof (saltlen)) < sizeof (saltlen)) { - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); UNCOMP_BAIL; } saltlen = ntohl(saltlen); @@ -916,7 +867,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) salt2 = (uchar_t *)malloc(saltlen); if (Read(compfd, salt1, saltlen) < saltlen) { free(salt1); free(salt2); - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); UNCOMP_BAIL; } deserialize_checksum(salt2, salt1, saltlen); @@ -926,7 +877,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) free(salt2); memset(salt1, 0, saltlen); free(salt1); - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); UNCOMP_BAIL; } @@ -943,7 +894,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) free(salt2); memset(salt1, 0, saltlen); free(salt1); - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); UNCOMP_BAIL; } pctx->keylen = ntohl(pctx->keylen); @@ -954,7 +905,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) free(salt2); memset(salt1, 0, saltlen); free(salt1); - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); UNCOMP_BAIL; } deserialize_checksum(hdr_hash2, hdr_hash1, pctx->mac_bytes); @@ -999,7 +950,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) } } if (pw_len == -1) { - perror(" "); + log_msg(LOG_ERR, 1, " "); memset(salt2, 0, saltlen); free(salt2); memset(salt1, 0, saltlen); @@ -1089,7 +1040,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) * Verify file header CRC32 in non-crypto mode. */ if (Read(compfd, &crc1, sizeof (crc1)) < sizeof (crc1)) { - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); UNCOMP_BAIL; } crc1 = htonl(crc1); @@ -1175,7 +1126,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) } if (pctx->enable_rabin_global) { if ((tdat->rctx->out_fd = open(to_filename, O_RDONLY, 0)) == -1) { - perror("Unable to get new read handle to output file"); + log_msg(LOG_ERR, 1, "Unable to get new read handle to output file"); UNCOMP_BAIL; } } @@ -1192,7 +1143,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) } if (pthread_create(&(tdat->thr), NULL, perform_decompress, (void *)tdat) != 0) { - perror("Error in thread creation: "); + log_msg(LOG_ERR, 1, "Error in thread creation: "); UNCOMP_BAIL; } } @@ -1218,7 +1169,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) w.chunksize = chunksize; w.pctx = pctx; if (pthread_create(&writer_thr, NULL, writer_thread, (void *)(&w)) != 0) { - perror("Error in thread creation: "); + log_msg(LOG_ERR, 1, "Error in thread creation: "); UNCOMP_BAIL; } @@ -1248,7 +1199,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) */ rb = Read(compfd, &tdat->len_cmp, sizeof (tdat->len_cmp)); if (rb != sizeof (tdat->len_cmp)) { - if (rb < 0) perror("Read: "); + if (rb < 0) log_msg(LOG_ERR, 1, "Read: "); else log_msg(LOG_ERR, 0, "Incomplete chunk %d header," "file corrupt\n", pctx->chunk_num); @@ -1311,7 +1262,7 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) if (pctx->main_cancel) break; if (tdat->rbytes < tdat->len_cmp + pctx->cksum_bytes + pctx->mac_bytes + CHUNK_FLAG_SZ) { if (tdat->rbytes < 0) { - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); UNCOMP_BAIL; } else { log_msg(LOG_ERR, 0, "Incomplete chunk %d, file corrupt.\n", @@ -1352,7 +1303,7 @@ uncomp_done: if (filename != NULL) { fchmod(uncompfd, sbuf.st_mode); if (fchown(uncompfd, sbuf.st_uid, sbuf.st_gid) == -1) - perror("Chown "); + log_msg(LOG_ERR, 1, "Chown "); } if (dary != NULL) { for (i = 0; i < nprocs; i++) { @@ -1679,7 +1630,7 @@ repeat: wbytes = Write(w->wfd, tdat->cmp_seg, tdat->len_cmp); if (unlikely(wbytes != tdat->len_cmp)) { - perror("Chunk Write: "); + log_msg(LOG_ERR, 1, "Chunk Write: "); do_cancel: pctx->main_cancel = 1; tdat->cancel = 1; @@ -1811,26 +1762,40 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev /* A host of sanity checks. */ if (!pctx->pipe_mode) { char *tmp; - if ((uncompfd = open(filename, O_RDONLY, 0)) == -1) { - log_msg(LOG_ERR, 1, "Cannot open: %s", filename); - return (1); - } + if (!(pctx->archive_mode)) { + if ((uncompfd = open(filename, O_RDONLY, 0)) == -1) { + log_msg(LOG_ERR, 1, "Cannot open: %s", filename); + return (1); + } - if (fstat(uncompfd, &sbuf) == -1) { - close(uncompfd); - log_msg(LOG_ERR, 1, "Cannot stat: %s", filename); - return (1); - } + if (fstat(uncompfd, &sbuf) == -1) { + close(uncompfd); + log_msg(LOG_ERR, 1, "Cannot stat: %s", filename); + return (1); + } - if (!S_ISREG(sbuf.st_mode)) { - close(uncompfd); - log_msg(LOG_ERR, 0, "File %s is not a regular file.\n", filename); - return (1); - } + if (!S_ISREG(sbuf.st_mode)) { + close(uncompfd); + log_msg(LOG_ERR, 0, "File %s is not a regular file.\n", filename); + return (1); + } - if (sbuf.st_size == 0) { - close(uncompfd); - return (1); + if (sbuf.st_size == 0) { + close(uncompfd); + return (1); + } + } else { + if (setup_archive(pctx, &sbuf) == -1) { + log_msg(LOG_ERR, 0, "Setup archive failed for %s\n", pctx->filename); + return (1); + } + + /* + * This is a pipe between the libarchive based archiving process and + * the rest of the compression stuff. + */ + uncompfd = pctx->uncompfd; + exit(0); } /* @@ -1882,7 +1847,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev if (pctx->pipe_out) { compfd = fileno(stdout); if (compfd == -1) { - perror("fileno "); + log_msg(LOG_ERR, 1, "fileno "); COMP_BAIL; } } else { @@ -1890,21 +1855,19 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev strcat(tmpfile1, "/.pcompXXXXXX"); snprintf(to_filename, sizeof (to_filename), "%s" COMP_EXTN, filename); if ((compfd = mkstemp(tmpfile1)) == -1) { - perror("mkstemp "); + log_msg(LOG_ERR, 1, "mkstemp "); COMP_BAIL; } add_fname(tmpfile1); } else { snprintf(to_filename, sizeof (to_filename), "%s" COMP_EXTN, pctx->to_filename); if ((compfd = open(to_filename, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR)) == -1) { - perror("open "); + log_msg(LOG_ERR, 1, "open "); COMP_BAIL; } add_fname(to_filename); } } - signal(SIGINT, Int_Handler); - signal(SIGTERM, Int_Handler); } else { char *tmp; @@ -1913,33 +1876,21 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev */ compfd = fileno(stdout); if (compfd == -1) { - perror("fileno "); + log_msg(LOG_ERR, 1, "fileno "); COMP_BAIL; } uncompfd = fileno(stdin); if (uncompfd == -1) { - perror("fileno "); + log_msg(LOG_ERR, 1, "fileno "); COMP_BAIL; } /* * Get a workable temporary dir. Required if global dedupe is enabled. */ - tmp = getenv("PCOMPRESS_CACHE_DIR"); - if (tmp == NULL || !chk_dir(tmp)) { - tmp = getenv("TMPDIR"); - if (tmp == NULL || !chk_dir(tmp)) { - tmp = getenv("HOME"); - if (tmp == NULL || !chk_dir(tmp)) { - if (getcwd(tmpdir, MAXPATHLEN) == NULL) { - tmp = "/tmp"; - } else { - tmp = tmpdir; - } - } - } - } + tmp = get_temp_dir(); strcpy(tmpdir, tmp); + free(tmp); } if (pctx->enable_rabin_global) { @@ -2061,7 +2012,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev } if (pthread_create(&(tdat->thr), NULL, perform_compress, (void *)tdat) != 0) { - perror("Error in thread creation: "); + log_msg(LOG_ERR, 1, "Error in thread creation: "); COMP_BAIL; } } @@ -2099,7 +2050,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev w.nprocs = nprocs; w.pctx = pctx; if (pthread_create(&writer_thr, NULL, writer_thread, (void *)(&w)) != 0) { - perror("Error in thread creation: "); + log_msg(LOG_ERR, 1, "Error in thread creation: "); COMP_BAIL; } wthread = 1; @@ -2146,7 +2097,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev pos += sizeof (int); } if (Write(compfd, cread_buf, pos - cread_buf) != pos - cread_buf) { - perror("Write "); + log_msg(LOG_ERR, 1, "Write "); COMP_BAIL; } @@ -2173,7 +2124,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev serialize_checksum(hdr_hash, pos, hlen); pos += hlen; if (Write(compfd, cread_buf, pos - cread_buf) != pos - cread_buf) { - perror("Write "); + log_msg(LOG_ERR, 1, "Write "); COMP_BAIL; } } else { @@ -2183,7 +2134,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev uint32_t crc = lzma_crc32(cread_buf, pos - cread_buf, 0); U32_P(cread_buf) = htonl(crc); if (Write(compfd, cread_buf, sizeof (uint32_t)) != sizeof (uint32_t)) { - perror("Write "); + log_msg(LOG_ERR, 1, "Write "); COMP_BAIL; } } @@ -2298,7 +2249,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev if (rbytes < chunksize) { if (rbytes < 0) { bail = 1; - perror("Read: "); + log_msg(LOG_ERR, 1, "Read: "); COMP_BAIL; } } @@ -2367,7 +2318,7 @@ comp_done: compressed_chunksize = 0; if (Write(compfd, &compressed_chunksize, sizeof (compressed_chunksize)) < 0) { - perror("Write "); + log_msg(LOG_ERR, 1, "Write "); err = 1; } @@ -2381,12 +2332,12 @@ comp_done: */ fchmod(compfd, sbuf.st_mode); if (fchown(compfd, sbuf.st_uid, sbuf.st_gid) == -1) - perror("chown "); + log_msg(LOG_ERR, 1, "chown "); close(compfd); if (pctx->to_filename == NULL) { if (rename(tmpfile1, to_filename) == -1) { - perror("Cannot rename temporary file "); + log_msg(LOG_ERR, 1, "Cannot rename temporary file "); unlink(tmpfile1); } rm_fname(tmpfile1); @@ -2553,11 +2504,6 @@ create_pc_context(void) { pc_ctx_t *ctx = (pc_ctx_t *)malloc(sizeof (pc_ctx_t)); - pthread_mutex_lock(&f_mutex); - if (!f_inited) { - memset(f_name_list, 0, sizeof (f_name_list)); - } - pthread_mutex_unlock(&f_mutex); slab_init(); init_pcompress(); @@ -2779,6 +2725,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) else pctx->rab_blk_size = RAB_BLK_DEFAULT; } + /* * Remaining mandatory arguments are the filenames. */ @@ -2915,6 +2862,16 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) */ pctx->cksum_bytes = 0; } + + if (pctx->do_compress) { + struct stat sbuf; + + if (stat(pctx->filename, &sbuf) == -1) { + log_msg(LOG_ERR, 1, "Cannot stat: %s", pctx->filename); + return (1); + } + if (S_ISDIR(sbuf.st_mode)) pctx->archive_mode = 1; + } pctx->inited = 1; return (0); @@ -2927,6 +2884,8 @@ start_pcompress(pc_ctx_t *pctx) if (!pctx->inited) return (1); + + handle_signals(); err = 0; if (pctx->do_compress) err = start_compress(pctx, pctx->filename, pctx->chunksize, pctx->level); diff --git a/pcompress.h b/pcompress.h index 2dd046e..1d6235f 100644 --- a/pcompress.h +++ b/pcompress.h @@ -197,9 +197,14 @@ typedef struct pc_ctx { int enable_fixed_scan; int lzp_preprocess; int encrypt_type; + int archive_mode; + char archive_members_file[MAXPATHLEN]; + int archive_members_fd, archive_data_fd; + void *archive_ctx; + int uncompfd, compfd; unsigned int chunk_num; uint64_t largest_chunk, smallest_chunk, avg_chunk; - uint64_t chunksize; + uint64_t chunksize, archive_size; const char *algo, *filename, *to_filename; char *exec_name; int do_compress, level; diff --git a/rabin/global/dedupe_config.c b/rabin/global/dedupe_config.c index fb9deee..1d93e6a 100644 --- a/rabin/global/dedupe_config.c +++ b/rabin/global/dedupe_config.c @@ -204,7 +204,7 @@ read_config(char *configfile, archive_config_t *cfg) fh = fopen(configfile, "r"); if (fh == NULL) { - perror(" "); + log_msg(LOG_ERR, 1, " "); return (1); } while (fgets(line, 255, fh) != NULL) { @@ -232,7 +232,7 @@ read_config(char *configfile, archive_config_t *cfg) struct stat sb; if (stat(pos, &sb) == -1) { if (errno != ENOENT) { - perror(" "); + log_msg(LOG_ERR, 1, " "); log_msg(LOG_ERR, 0, "Invalid ROOTDIR.\n"); fclose(fh); return (1); @@ -336,7 +336,7 @@ write_config(char *configfile, archive_config_t *cfg) fh = fopen(configfile, "w"); if (fh == NULL) { - perror(" "); + log_msg(LOG_ERR, 1, " "); return (1); } diff --git a/rabin/global/index.c b/rabin/global/index.c index fc238d3..ed65813 100644 --- a/rabin/global/index.c +++ b/rabin/global/index.c @@ -285,7 +285,7 @@ init_global_db_s(char *path, char *tmppath, uint32_t chunksize, uint64_t user_ch for (i = 0; i < nthreads; i++) { cfg->seg_fd_r[i].fd = open(cfg->rootdir, O_RDONLY); if (cfg->seg_fd_r[i].fd == -1) { - perror(" "); + log_msg(LOG_ERR, 1, " "); errored = 1; break; } @@ -394,7 +394,7 @@ db_segcache_map(archive_config_t *cfg, int tid, uint32_t *blknum, uint64_t *offs db_segcache_unmap(cfg, tid); fd = cfg->seg_fd_r[tid].fd; if (lseek(fd, *offset, SEEK_SET) != *offset) { - perror(" "); + log_msg(LOG_ERR, 1, " "); return (-1); } @@ -410,7 +410,7 @@ db_segcache_map(archive_config_t *cfg, int tid, uint32_t *blknum, uint64_t *offs mapbuf = mmap(NULL, len + adj, PROT_READ, MAP_SHARED, fd, *offset - adj); if (mapbuf == MAP_FAILED) { - perror(" "); + log_msg(LOG_ERR, 1, " "); return (-1); } diff --git a/rabin/rabin_dedup.c b/rabin/rabin_dedup.c index ea4b6a5..bba0899 100755 --- a/rabin/rabin_dedup.c +++ b/rabin/rabin_dedup.c @@ -1609,7 +1609,7 @@ dedupe_decompress(dedupe_context_t *ctx, uchar_t *buf, uint64_t *size) adj = pos1 % ctx->pagesize; src2 = mmap(NULL, len + adj, PROT_READ, MAP_SHARED, ctx->out_fd, pos1 - adj); if (src2 == NULL) { - perror("MMAP failed "); + log_msg(LOG_ERR, 1, "MMAP failed "); ctx->valid = 0; break; } diff --git a/utils/utils.c b/utils/utils.c index 0a028b5..c6be5d4 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -48,8 +49,11 @@ #include "utils.h" processor_info_t proc_info; +pthread_mutex_t f_mutex = PTHREAD_MUTEX_INITIALIZER; static int cur_log_level = 1; static log_dest_t ldest = {LOG_OUTPUT, LOG_INFO, NULL}; +static char *f_name_list[512]; +static int f_count = 512, f_inited = 0; void init_pcompress() { @@ -213,13 +217,13 @@ Read_Adjusted(int fd, uchar_t *buf, uint64_t count, int64_t *rabin_count, void * int64_t rcount; dedupe_context_t *rctx = (dedupe_context_t *)ctx; - if (!ctx) return (Read(fd, buf, count)); + if (!ctx) return (Read(fd, buf, count)); buf2 = buf; if (*rabin_count) { buf2 = buf + *rabin_count; count -= *rabin_count; } - rcount = Read(fd, buf2, count); + rcount = Read(fd, buf2, count); if (rcount > 0) { rcount += *rabin_count; if (rcount == count) { @@ -422,6 +426,10 @@ chk_dir(char *dir) return (1); } +/* + * Simple logging functions. Used for all error and info messages. + * Default log destination is STDOUT. + */ void DLL_EXPORT set_log_dest(log_dest_t *dest) { @@ -464,3 +472,86 @@ log_msg(log_level_t log_level, int show_errno, const char *format, ...) ldest.cb(msg); } } + +char * +get_temp_dir() +{ + char *tmp; + char tmpdir[MAXPATHLEN]; + + tmp = getenv("PCOMPRESS_CACHE_DIR"); + if (tmp == NULL || !chk_dir(tmp)) { + tmp = getenv("TMPDIR"); + if (tmp == NULL || !chk_dir(tmp)) { + tmp = getenv("HOME"); + if (tmp == NULL || !chk_dir(tmp)) { + if (getcwd(tmpdir, MAXPATHLEN) == NULL) { + tmp = "/tmp"; + } else { + tmp = tmpdir; + } + } + } + } + return (strdup(tmp)); +} + +/* + * Temporary file cleanup routines for SIGINT. Maintain a list of + * filenames to be removed in the signal handler. + */ +void +Int_Handler(int signo) +{ + int i; + + for (i = 0; i < f_count; i++) { + if (f_name_list[i] != NULL) { + unlink(f_name_list[i]); + f_name_list[i] = NULL; + } + } + exit(1); +} + +void +handle_signals() +{ + pthread_mutex_lock(&f_mutex); + if (!f_inited) { + memset(f_name_list, 0, sizeof (f_name_list)); + } + pthread_mutex_unlock(&f_mutex); + signal(SIGINT, Int_Handler); + signal(SIGTERM, Int_Handler); +} + +void +add_fname(char *fn) +{ + int i; + + pthread_mutex_lock(&f_mutex); + for (i = 0; i < f_count; i++) { + if (f_name_list[i] == NULL) { + f_name_list[i] = fn; + break; + } + } + pthread_mutex_unlock(&f_mutex); +} + +void +rm_fname(char *fn) +{ + int i; + + pthread_mutex_lock(&f_mutex); + for (i = 0; i < f_count; i++) { + if (f_name_list[i] != NULL) { + f_name_list[i] = fn; + break; + } + } + pthread_mutex_unlock(&f_mutex); +} diff --git a/utils/utils.h b/utils/utils.h index eabb871..d769e34 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -35,6 +35,7 @@ #endif #include +#include #include #include #include @@ -57,7 +58,7 @@ extern "C" { #define SIXTEEN_GB (EIGHT_GB * 2) #if !defined(sun) && !defined(__sun) -#define uchar_t u_char +typedef unsigned char uchar_t ; #endif #if ULONG_MAX == 4294967295UL @@ -243,6 +244,7 @@ extern void get_sys_limits(my_sysinfo *msys_info); extern int chk_dir(char *dir); extern void init_algo_props(algo_props_t *props); extern void init_pcompress(); +extern char *get_temp_dir(); /* Pointer type for compress and decompress functions. */ typedef int (*compress_func_ptr)(void *src, uint64_t srclen, void *dst, @@ -287,6 +289,14 @@ void set_log_dest(log_dest_t *dest); void set_log_level(int level); void log_msg(log_level_t log_level, int show_errno, const char *format, ...); +/* + * Tempfile cleanup handlers and tempfile registration routines. + */ +void Int_Handler(int signo); +void handle_signals(); +void add_fname(char *fn); +void rm_fname(char *fn); + /* * Roundup v to the nearest power of 2. From Bit Twiddling Hacks: * http://graphics.stanford.edu/~seander/bithacks.html