From e09d8a485c3eef36e8c8774d574b0265c0f2b6cb Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Thu, 31 Oct 2013 00:15:17 +0530 Subject: [PATCH] Archiving support using Libarchive: Working archive extraction. --- archive/pc_archive.c | 117 ++++++++++++++++++++++++++++- archive/pc_archive.h | 4 +- pcompress.c | 172 +++++++++++++++++++++++++++++++++++-------- pcompress.h | 8 +- 4 files changed, 264 insertions(+), 37 deletions(-) diff --git a/archive/pc_archive.c b/archive/pc_archive.c index e97cf78..6e2a977 100644 --- a/archive/pc_archive.c +++ b/archive/pc_archive.c @@ -104,7 +104,7 @@ add_pathname(const char *fpath, const struct stat *sb, * sets up the libarchive context. */ int -setup_archive(pc_ctx_t *pctx, struct stat *sbuf) +setup_archiver(pc_ctx_t *pctx, struct stat *sbuf) { char *tmpfile, *tmp; int err, fd, pipefd[2]; @@ -176,14 +176,15 @@ setup_archive(pc_ctx_t *pctx, struct stat *sbuf) a_state.bufpos = 0; } pctx->archive_size += a_state.arc_size; - sbuf->st_size += a_state.arc_size; fn = fn->next; } pthread_mutex_unlock(&nftw_mutex); + sbuf->st_size = pctx->archive_size; lseek(fd, 0, SEEK_SET); free(pbuf); sbuf->st_uid = geteuid(); sbuf->st_gid = getegid(); + sbuf->st_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; if (pipe(pipefd) == -1) { log_msg(LOG_ERR, 1, "Unable to create archiver pipe.\n"); @@ -208,6 +209,32 @@ setup_archive(pc_ctx_t *pctx, struct stat *sbuf) return (0); } +int +setup_extractor(pc_ctx_t *pctx) +{ + int pipefd[2]; + struct archive *arc; + + if (pipe(pipefd) == -1) { + log_msg(LOG_ERR, 1, "Unable to create extractor pipe.\n"); + return (-1); + } + + pctx->uncompfd = pipefd[1]; // Write side + pctx->archive_data_fd = pipefd[0]; // Read side + + arc = archive_read_new(); + if (!arc) { + log_msg(LOG_ERR, 1, "Unable to create libarchive context.\n"); + close(pipefd[0]); close(pipefd[1]); + return (-1); + } + archive_read_support_format_all(arc); + pctx->archive_ctx = arc; + + return (0); +} + /* * Routines to archive members and write the archive to pipe. Most of the following * code is adapted from some of the Libarchive bsdtar code. @@ -296,7 +323,7 @@ write_entry(pc_ctx_t *pctx, struct archive *arc, struct archive *in_arc, * reads from the other end and compresses. */ static void * -archive_thread_func(void *dat) { +archiver_thread_func(void *dat) { pc_ctx_t *pctx = (pc_ctx_t *)dat; char fpath[PATH_MAX], *name; ssize_t rbytes; @@ -390,5 +417,87 @@ done: int start_archiver(pc_ctx_t *pctx) { - return (pthread_create(&(pctx->archive_thread), NULL, archive_thread_func, (void *)pctx)); + return (pthread_create(&(pctx->archive_thread), NULL, archiver_thread_func, (void *)pctx)); +} + +/* + * Extract Thread function. Read an uncompressed archive from the pipe and extract + * members to disk. The decompressor writes to the other end of the pipe. + */ +static void * +extractor_thread_func(void *dat) { + pc_ctx_t *pctx = (pc_ctx_t *)dat; + char cwd[PATH_MAX], got_cwd; + int flags, rv; + struct archive_entry *entry; + struct archive *awd, *arc; + + flags = ARCHIVE_EXTRACT_TIME; + flags |= ARCHIVE_EXTRACT_PERM; + flags |= ARCHIVE_EXTRACT_ACL; + flags |= ARCHIVE_EXTRACT_FFLAGS; + + got_cwd = 1; + if (getcwd(cwd, PATH_MAX) == NULL) { + log_msg(LOG_WARN, 1, "Cannot get current directory."); + got_cwd = 0; + } + + if (chdir(pctx->to_filename) == -1) { + log_msg(LOG_ERR, 1, "Cannot change to dir: %s", pctx->to_filename); + goto done; + } + + awd = archive_write_disk_new(); + archive_write_disk_set_options(awd, flags); + archive_write_disk_set_standard_lookup(awd); + arc = (struct archive *)(pctx->archive_ctx); + archive_read_open_fd(arc, pctx->archive_data_fd, MMAP_SIZE); + + /* + * Read archive entries and extract to disk. + */ + while ((rv = archive_read_next_header(arc, &entry)) != ARCHIVE_EOF) { + if (rv != ARCHIVE_OK) + log_msg(LOG_WARN, 0, "%s", archive_error_string(arc)); + + if (rv == ARCHIVE_FATAL) { + log_msg(LOG_ERR, 0, "Fatal error aborting extraction."); + break; + } + + if (rv == ARCHIVE_RETRY) { + log_msg(LOG_INFO, 0, "Retrying extractor read ..."); + continue; + } + + rv = archive_read_extract2(arc, entry, awd); + if (rv != ARCHIVE_OK) { + log_msg(LOG_WARN, 0, "%s: %s", archive_entry_pathname(entry), + archive_error_string(arc)); + + } else if (pctx->verbose) { + log_msg(LOG_INFO, 0, "%10d %s", archive_entry_size(entry), + archive_entry_pathname(entry)); + } + + if (rv == ARCHIVE_FATAL) { + log_msg(LOG_ERR, 0, "Fatal error aborting extraction."); + break; + } + } + + if (got_cwd) { + rv = chdir(cwd); + } + archive_read_free(arc); + archive_write_free(awd); +done: + close(pctx->archive_data_fd); + return (NULL); +} + +int +start_extractor(pc_ctx_t *pctx) { + return (pthread_create(&(pctx->archive_thread), NULL, extractor_thread_func, (void *)pctx)); } diff --git a/archive/pc_archive.h b/archive/pc_archive.h index 53e9c7d..c0c83a2 100644 --- a/archive/pc_archive.h +++ b/archive/pc_archive.h @@ -41,8 +41,10 @@ typedef struct { /* * Archiving related functions. */ -int setup_archive(pc_ctx_t *pctx, struct stat *sbuf); +int setup_archiver(pc_ctx_t *pctx, struct stat *sbuf); int start_archiver(pc_ctx_t *pctx); +int setup_extractor(pc_ctx_t *pctx); +int start_extractor(pc_ctx_t *pctx); #ifdef __cplusplus } diff --git a/pcompress.c b/pcompress.c index c63bf94..45769d7 100644 --- a/pcompress.c +++ b/pcompress.c @@ -53,6 +53,7 @@ #include #include #include +#include #include /* @@ -644,7 +645,7 @@ cont: #define UNCOMP_BAIL err = 1; goto uncomp_done int DLL_EXPORT -start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) +start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename) { char algorithm[ALGO_SZ]; struct stat sbuf; @@ -689,23 +690,12 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) if (sbuf.st_size == 0) return (1); } - - if ((uncompfd = open(to_filename, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) == -1) { - close(compfd); - log_msg(LOG_ERR, 1, "Cannot open: %s", to_filename); - return (1); - } } else { compfd = fileno(stdin); if (compfd == -1) { log_msg(LOG_ERR, 1, "fileno "); UNCOMP_BAIL; } - uncompfd = fileno(stdout); - if (uncompfd == -1) { - log_msg(LOG_ERR, 1, "fileno "); - UNCOMP_BAIL; - } } /* @@ -762,6 +752,76 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) goto uncomp_done; } + /* + * First check for archive mode. In that case the to_filename must be a directory. + */ + if (flags & FLAG_ARCHIVE) { + /* + * If to_filename is not set, we just use the current directory. + */ + if (to_filename == NULL) { + to_filename = "."; + pctx->to_filename = "."; + } + pctx->archive_mode = 1; + if (stat(to_filename, &sbuf) == -1) { + if (errno != ENOENT) { + log_msg(LOG_ERR, 1, "Target path is not a directory."); + err = 1; + goto uncomp_done; + } + if (mkdir(to_filename, S_IRUSR|S_IWUSR) == -1) { + log_msg(LOG_ERR, 1, "Unable to create target directory %s.", to_filename); + err = 1; + goto uncomp_done; + } + } + if (!S_ISDIR(sbuf.st_mode)) { + log_msg(LOG_ERR, 0, "Target path is not a directory.", to_filename); + err = 1; + goto uncomp_done; + } + } else { + const char *origf; + + if (to_filename == NULL) { + char *pos; + + /* + * Use unused space in archive_members_file buffer to hold generated + * filename so that it need not be explicitly freed at the end. + */ + to_filename = pctx->archive_members_file; + pctx->to_filename = pctx->archive_members_file; + pos = strrchr(filename, '.'); + if (pos != NULL) { + if ((pos[0] == 'p' || pos[0] == 'P') && (pos[1] == 'z' || pos[1] == 'Z')) { + memcpy(to_filename, filename, pos - filename); + } else { + pos = NULL; + } + } + + /* + * If no .pz extension is found then use .out as the + * decompressed file name. + */ + if (pos == NULL) { + strcpy(to_filename, filename); + strcat(to_filename, ".out"); + log_msg(LOG_WARN, 0, "Using %s for output file name.", to_filename); + } + } + origf = to_filename; + if ((to_filename = realpath(origf, NULL)) != NULL) { + free((void *)(to_filename)); + log_msg(LOG_ERR, 0, "File %s exists", origf); + err = 1; + goto uncomp_done; + } + } + + compressed_chunksize = chunksize + CHUNK_HDR_SZ + zlib_buf_extra(chunksize); if (pctx->_props_func) { @@ -955,7 +1015,6 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) free(salt2); memset(salt1, 0, saltlen); free(salt1); - close(uncompfd); unlink(to_filename); log_msg(LOG_ERR, 0, "Failed to get password."); UNCOMP_BAIL; } @@ -970,7 +1029,6 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) memset(salt1, 0, saltlen); free(salt1); memset(pctx->user_pw, 0, pctx->user_pw_len); - close(uncompfd); unlink(to_filename); log_msg(LOG_ERR, 0, "Failed to initialize crypto"); UNCOMP_BAIL; } @@ -985,7 +1043,6 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) memset(salt1, 0, saltlen); free(salt1); memset(pw, 0, MAX_PW_LEN); - close(uncompfd); unlink(to_filename); log_msg(LOG_ERR, 0, "Failed to initialize crypto"); UNCOMP_BAIL; } @@ -999,7 +1056,6 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) * Verify file header HMAC. */ if (hmac_init(&hdr_mac, pctx->cksum, &(pctx->crypto_ctx)) == -1) { - close(uncompfd); unlink(to_filename); log_msg(LOG_ERR, 0, "Cannot initialize header hmac."); UNCOMP_BAIL; } @@ -1026,7 +1082,6 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) free(salt1); memset(n1, 0, noncelen); if (memcmp(hdr_hash2, hdr_hash1, pctx->mac_bytes) != 0) { - close(uncompfd); unlink(to_filename); log_msg(LOG_ERR, 0, "Header verification failed! File tampered or wrong password."); UNCOMP_BAIL; } @@ -1056,12 +1111,48 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) d2 = htonl(level); crc2 = lzma_crc32((uchar_t *)&d2, sizeof (level), crc2); if (crc1 != crc2) { - close(uncompfd); unlink(to_filename); log_msg(LOG_ERR, 0, "Header verification failed! File tampered or wrong password."); UNCOMP_BAIL; } } + if (flags & FLAG_ARCHIVE) { + if (pctx->enable_rabin_global) { + strcpy(pctx->archive_temp_file, to_filename); + strcat(pctx->archive_temp_file, "/.data"); + if ((pctx->archive_temp_fd = open(pctx->archive_temp_file, + O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) == -1) { + log_msg(LOG_ERR, 1, "Cannot open temporary data file in target directory."); + UNCOMP_BAIL; + } + add_fname(pctx->archive_temp_file); + } + if (setup_extractor(pctx) == -1) { + log_msg(LOG_ERR, 0, "Setup of extraction context failed."); + UNCOMP_BAIL; + } + uncompfd = pctx->uncompfd; + + if (start_extractor(pctx) == -1) { + log_msg(LOG_ERR, 0, "Unable to start extraction thread."); + UNCOMP_BAIL; + } + } else { + if (!pctx->pipe_mode) { + if ((uncompfd = open(to_filename, O_WRONLY|O_CREAT|O_TRUNC, + S_IRUSR|S_IWUSR)) == -1) { + log_msg(LOG_ERR, 1, "Cannot open: %s", to_filename); + UNCOMP_BAIL; + } + } else { + uncompfd = fileno(stdout); + if (uncompfd == -1) { + log_msg(LOG_ERR, 1, "fileno "); + UNCOMP_BAIL; + } + } + } + nprocs = sysconf(_SC_NPROCESSORS_ONLN); if (pctx->nthreads > 0 && pctx->nthreads < nprocs) nprocs = pctx->nthreads; @@ -1126,9 +1217,19 @@ start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename) UNCOMP_BAIL; } if (pctx->enable_rabin_global) { - if ((tdat->rctx->out_fd = open(to_filename, O_RDONLY, 0)) == -1) { - log_msg(LOG_ERR, 1, "Unable to get new read handle to output file"); - UNCOMP_BAIL; + if (pctx->archive_mode) { + if ((tdat->rctx->out_fd = open(pctx->archive_temp_file, + O_RDONLY, 0)) == -1) { + log_msg(LOG_ERR, 1, "Unable to get new read handle" + " to output file"); + UNCOMP_BAIL; + } + } else { + if ((tdat->rctx->out_fd = open(to_filename, O_RDONLY, 0)) == -1) { + log_msg(LOG_ERR, 1, "Unable to get new read handle" + " to output file"); + UNCOMP_BAIL; + } } } tdat->rctx->index_sem = &(tdat->index_sem); @@ -1326,6 +1427,13 @@ uncomp_done: if (filename && compfd != -1) close(compfd); if (uncompfd != -1) close(uncompfd); } + if (pctx->archive_mode) { + pthread_join(pctx->archive_thread, NULL); + if (pctx->enable_rabin_global) { + close(pctx->archive_temp_fd); + unlink(pctx->archive_temp_file); + } + } if (!pctx->hide_cmp_stats) show_compression_stats(pctx); @@ -1630,6 +1738,9 @@ repeat: } wbytes = Write(w->wfd, tdat->cmp_seg, tdat->len_cmp); + if (pctx->archive_temp_fd != -1 && wbytes == tdat->len_cmp) { + wbytes = Write(pctx->archive_temp_fd, tdat->cmp_seg, tdat->len_cmp); + } if (unlikely(wbytes != tdat->len_cmp)) { log_msg(LOG_ERR, 1, "Chunk Write: "); do_cancel: @@ -1786,8 +1897,8 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev return (1); } } else { - if (setup_archive(pctx, &sbuf) == -1) { - log_msg(LOG_ERR, 0, "Setup archive failed for %s", pctx->filename); + if (setup_archiver(pctx, &sbuf) == -1) { + log_msg(LOG_ERR, 0, "Setup archiver failed."); return (1); } @@ -2074,6 +2185,7 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev if (start_archiver(pctx) != 0) { COMP_BAIL; } + flags |= FLAG_ARCHIVE; } /* @@ -2552,6 +2664,7 @@ create_pc_context(void) ctx->hide_cmp_stats = 1; ctx->enable_rabin_split = 1; ctx->rab_blk_size = -1; + ctx->archive_temp_fd = -1; return (ctx); } @@ -2906,7 +3019,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) return (1); } } - } else if (pctx->do_uncompress && num_rem == 2) { + } else if (pctx->do_uncompress) { /* * While decompressing, input can be stdin and output a physical file. */ @@ -2918,13 +3031,12 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) return (1); } } - my_optind++; - if ((pctx->to_filename = realpath(argv[my_optind], NULL)) != NULL) { - free((void *)(pctx->to_filename)); - log_msg(LOG_ERR, 0, "File %s exists", argv[my_optind]); - return (1); + if (num_rem == 2) { + my_optind++; + pctx->to_filename = argv[my_optind]; + } else { + pctx->to_filename = NULL; } - pctx->to_filename = argv[my_optind]; } else { return (1); } diff --git a/pcompress.h b/pcompress.h index aacdda6..29b46e6 100644 --- a/pcompress.h +++ b/pcompress.h @@ -44,6 +44,7 @@ extern "C" { #define FLAG_DEDUP 1 #define FLAG_DEDUP_FIXED 2 #define FLAG_SINGLE_CHUNK 4 +#define FLAG_ARCHIVE 2048 #define UTILITY_VERSION "2.4" #define MASK_CRYPTO_ALG 0x30 #define MAX_LEVEL 14 @@ -204,10 +205,13 @@ typedef struct pc_ctx { void *archive_ctx; pthread_t archive_thread; int uncompfd, compfd; + char archive_temp_file[MAXPATHLEN]; + int archive_temp_fd; unsigned int chunk_num; uint64_t largest_chunk, smallest_chunk, avg_chunk; uint64_t chunksize, archive_size; - const char *algo, *filename, *to_filename; + const char *algo, *filename; + char *to_filename; struct fn_list *fn; char *exec_name; int do_compress, level; @@ -259,7 +263,7 @@ void pc_set_userpw(pc_ctx_t *pctx, unsigned char *pwdata, int pwlen); int start_pcompress(pc_ctx_t *pctx); int start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int level); -int start_decompress(pc_ctx_t *pctx, const char *filename, const char *to_filename); +int start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename); #ifdef __cplusplus }