From 448890a014ca20fc2a8f913f1ae0e87a0434f000 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sun, 3 Nov 2013 23:15:55 +0530 Subject: [PATCH] Replace slow pipe with direct memory copy for archive extraction. Miscellaneous corrections and tweaks. --- archive/pc_archive.c | 169 +++++++++++++++++++++++++++++++++++++++---- archive/pc_archive.h | 1 + pcompress.c | 26 +++++-- pcompress.h | 5 +- 4 files changed, 177 insertions(+), 24 deletions(-) diff --git a/archive/pc_archive.c b/archive/pc_archive.c index c6be1f7..bbdb4f9 100644 --- a/archive/pc_archive.c +++ b/archive/pc_archive.c @@ -95,7 +95,7 @@ pthread_mutex_t nftw_mutex = PTHREAD_MUTEX_INITIALIZER; * Archive writer callback routines for archive creation operation. */ static int -creat_open_callback(struct archive *arc, void *ctx) +arc_open_callback(struct archive *arc, void *ctx) { pc_ctx_t *pctx = (pc_ctx_t *)ctx; @@ -201,13 +201,79 @@ archiver_close(void *ctx) pc_ctx_t *pctx = (pc_ctx_t *)ctx; pctx->arc_closed = 1; + pctx->arc_buf = NULL; + pctx->arc_buf_size = 0; sem_post(&(pctx->write_sem)); sem_post(&(pctx->read_sem)); return (0); } +static int +extract_close_callback(struct archive *arc, void *ctx) +{ + pc_ctx_t *pctx = (pc_ctx_t *)ctx; + + pctx->arc_closed = 1; + if (pctx->arc_buf) { + sem_post(&(pctx->write_sem)); + } else { + pctx->arc_buf_size = 0; + } + return (ARCHIVE_OK); +} + +static ssize_t +extract_read_callback(struct archive *arc, void *ctx, const void **buf) +{ + pc_ctx_t *pctx = (pc_ctx_t *)ctx; + + if (pctx->arc_closed) { + pctx->arc_buf_size = 0; + archive_set_error(arc, ARCHIVE_EOF, "End of file."); + return (0); + } + + if (!pctx->arc_writing) { + sem_wait(&(pctx->read_sem)); + } else { + sem_post(&(pctx->write_sem)); + sem_wait(&(pctx->read_sem)); + } + + if (pctx->arc_buf == NULL || pctx->arc_buf_size == 0) { + pctx->arc_buf_size = 0; + archive_set_error(arc, ARCHIVE_EOF, "End of file when extracting archive."); + return (0); + } + pctx->arc_writing = 1; + *buf = pctx->arc_buf; + + return (pctx->arc_buf_size); +} + +int64_t +archiver_write(void *ctx, void *buf, uint64_t count) +{ + pc_ctx_t *pctx = (pc_ctx_t *)ctx; + + if (pctx->arc_closed) + return (0); + + if (pctx->arc_buf != NULL) { + log_msg(LOG_ERR, 0, "Incorrect sequencing of archiver_read() call."); + return (-1); + } + + pctx->arc_buf = buf; + pctx->arc_buf_size = count; + sem_post(&(pctx->read_sem)); + sem_wait(&(pctx->write_sem)); + pctx->arc_buf = NULL; + return (pctx->arc_buf_size); +} + /* - * Comparison function for sorting pathname mambers. Sort by name/extension and then + * Comparison function for sorting pathname members. Sort by name/extension and then * by size. */ static int @@ -255,7 +321,7 @@ compare_members_lt(member_entry_t *mem1, member_entry_t *mem2) { * fetches the next entry in ascending order of the predetermined sort keys. */ static int -read_next_path(pc_ctx_t *pctx, char *fpath) +read_next_path(pc_ctx_t *pctx, char *fpath, char **namechars) { short namelen; ssize_t rbytes; @@ -300,6 +366,10 @@ read_next_path(pc_ctx_t *pctx, char *fpath) pctx->temp_file_pos = mem1->file_pos; } + /* + * Increment popped position of the current buffer and check if it is empty. + * The empty buffer is freed and is taken out of the linked list of buffers. + */ srt1->pos++; if (srt1->pos > srt1->max) { if (srt1 == pctx->archive_sort_buf) { @@ -317,7 +387,7 @@ read_next_path(pc_ctx_t *pctx, char *fpath) * new mmap. */ if (pctx->temp_mmap_len > 0) { - int retried; + int retried, n; if (pctx->temp_file_pos < pctx->temp_mmap_pos || pctx->temp_file_pos - pctx->temp_mmap_pos > pctx->temp_mmap_len || @@ -364,6 +434,12 @@ do_mmap: buf = pctx->temp_mmap_buf + (pctx->temp_file_pos - pctx->temp_mmap_pos); memcpy(fpath, buf, namelen); fpath[namelen] = '\0'; + + n = namelen-1; + while (fpath[n] == '/' && n > 0) n--; + while (fpath[n] != '/' && fpath[n] != '\\' && n > 0) n--; + *namechars = &fpath[n+1]; + pctx->temp_file_pos += namelen; return (namelen); } @@ -646,7 +722,7 @@ setup_archiver(pc_ctx_t *pctx, struct stat *sbuf) return (-1); } archive_write_set_format_pax_restricted(arc); - archive_write_open(arc, pctx, creat_open_callback, + archive_write_open(arc, pctx, arc_open_callback, creat_write_callback, creat_close_callback); pctx->archive_ctx = arc; pctx->archive_members_fd = fd; @@ -677,9 +753,6 @@ setup_extractor(pc_ctx_t *pctx) return (-1); } - pctx->uncompfd = pipefd[1]; // Write side - pctx->archive_data_fd = pipefd[0]; // Read side - arc = archive_read_new(); if (!arc) { log_msg(LOG_ERR, 1, "Unable to create libarchive context.\n"); @@ -782,12 +855,13 @@ write_entry(pc_ctx_t *pctx, struct archive *arc, struct archive *in_arc, static void * archiver_thread_func(void *dat) { pc_ctx_t *pctx = (pc_ctx_t *)dat; - char fpath[PATH_MAX], *name; + char fpath[PATH_MAX], *name, *bnchars = NULL; // Silence compiler int warn, rbytes; uint32_t ctr; struct archive_entry *entry, *spare_entry, *ent; struct archive *arc, *ard; struct archive_entry_linkresolver *resolver; + int readdisk_flags; warn = 1; entry = archive_entry_new(); @@ -800,14 +874,18 @@ archiver_thread_func(void *dat) { } ctr = 1; + readdisk_flags = ARCHIVE_READDISK_NO_TRAVERSE_MOUNTS; + readdisk_flags |= ARCHIVE_READDISK_HONOR_NODUMP; + ard = archive_read_disk_new(); + archive_read_disk_set_behavior(ard, readdisk_flags); archive_read_disk_set_standard_lookup(ard); archive_read_disk_set_symlink_physical(ard); /* * Read next path entry from list file. read_next_path() also handles sorted reading. */ - while ((rbytes = read_next_path(pctx, fpath)) != 0) { + while ((rbytes = read_next_path(pctx, fpath, &bnchars)) != 0) { if (rbytes == -1) break; archive_entry_copy_sourcepath(entry, fpath); if (archive_read_disk_entry_from_file(ard, entry, -1, NULL) != ARCHIVE_OK) { @@ -831,6 +909,29 @@ archiver_thread_func(void *dat) { name += 1; } } + +#ifndef __APPLE__ + /* + * Workaround for libarchive weirdness on Non MAC OS X platforms. The files + * with names matching pattern: ._* are MAC OS X resource forks which contain + * extended attributes, ACLs etc. They should be handled accordingly on MAC + * platforms and treated as normal files on others. For some reason beyond me + * libarchive refuses to extract these files on Linux, no matter what I try. + * Bug? + * + * In this case the file basename is changed and a custom extended attribute + * is set to indicate extraction to change it back. + */ + if (bnchars[0] == '.' && bnchars[1] == '_') { + char *pos = strstr(name, "._"); + char name[] = "@.", value[] = "m"; + if (pos) { + *pos = '|'; + archive_entry_xattr_add_entry(entry, name, value, strlen(value)); + } + } +#endif + if (name != archive_entry_pathname(entry)) archive_entry_copy_pathname(entry, name); @@ -862,7 +963,6 @@ done: archive_read_free(ard); archive_write_free(arc); close(pctx->archive_members_fd); - close(pctx->archive_data_fd); unlink(pctx->archive_members_file); return (NULL); } @@ -886,9 +986,21 @@ extractor_thread_func(void *dat) { struct archive *awd, *arc; flags = ARCHIVE_EXTRACT_TIME; - flags |= ARCHIVE_EXTRACT_PERM; - flags |= ARCHIVE_EXTRACT_ACL; - flags |= ARCHIVE_EXTRACT_FFLAGS; + flags |= ARCHIVE_EXTRACT_SECURE_SYMLINKS; + flags |= ARCHIVE_EXTRACT_SECURE_NODOTDOT; + flags |= ARCHIVE_EXTRACT_SPARSE; + + if (pctx->force_archive_perms || geteuid() == 0) { + flags |= ARCHIVE_EXTRACT_OWNER; + flags |= ARCHIVE_EXTRACT_PERM; + flags |= ARCHIVE_EXTRACT_ACL; + flags |= ARCHIVE_EXTRACT_XATTR; + flags |= ARCHIVE_EXTRACT_FFLAGS; + flags |= ARCHIVE_EXTRACT_MAC_METADATA; + } + + if (pctx->no_overwrite_newer) + flags |= ARCHIVE_EXTRACT_NO_OVERWRITE_NEWER; got_cwd = 1; if (getcwd(cwd, PATH_MAX) == NULL) { @@ -906,12 +1018,15 @@ extractor_thread_func(void *dat) { archive_write_disk_set_options(awd, flags); archive_write_disk_set_standard_lookup(awd); arc = (struct archive *)(pctx->archive_ctx); - archive_read_open_fd(arc, pctx->archive_data_fd, MMAP_SIZE); + archive_read_open(arc, pctx, arc_open_callback, extract_read_callback, extract_close_callback); /* * Read archive entries and extract to disk. */ while ((rv = archive_read_next_header(arc, &entry)) != ARCHIVE_EOF) { + const char *xt_name, *xt_value; + size_t xt_size; + if (rv != ARCHIVE_OK) log_msg(LOG_WARN, 0, "%s", archive_error_string(arc)); @@ -925,6 +1040,29 @@ extractor_thread_func(void *dat) { continue; } + /* + * Workaround for libarchive weirdness on Non MAC OS X platforms for filenames + * starting with '._'. See above ... + */ +#ifndef __APPLE__ + if (archive_entry_xattr_reset(entry) > 0) { + while (archive_entry_xattr_next(entry, &xt_name, (const void **)&xt_value, + &xt_size) == ARCHIVE_OK) { + if (xt_name[0] == '@' && xt_name[1] == '.' && xt_value[0] == 'm') { + const char *name; + char *pos; + name = archive_entry_pathname(entry); + pos = strstr(name, "|_"); + if (pos) { + *pos = '.'; + archive_entry_set_pathname(entry, name); + } + break; + } + } + } +#endif + rv = archive_read_extract2(arc, entry, awd); if (rv != ARCHIVE_OK) { log_msg(LOG_WARN, 0, "%s: %s", archive_entry_pathname(entry), @@ -948,7 +1086,6 @@ extractor_thread_func(void *dat) { archive_read_free(arc); archive_write_free(awd); done: - close(pctx->archive_data_fd); return (NULL); } diff --git a/archive/pc_archive.h b/archive/pc_archive.h index 5fe657e..6e8eb7c 100644 --- a/archive/pc_archive.h +++ b/archive/pc_archive.h @@ -46,6 +46,7 @@ int start_archiver(pc_ctx_t *pctx); int setup_extractor(pc_ctx_t *pctx); int start_extractor(pc_ctx_t *pctx); int64_t archiver_read(void *ctx, void *buf, uint64_t count); +int64_t archiver_write(void *ctx, void *buf, uint64_t count); int archiver_close(void *ctx); #ifdef __cplusplus diff --git a/pcompress.c b/pcompress.c index c2fb648..bd28afb 100644 --- a/pcompress.c +++ b/pcompress.c @@ -1127,11 +1127,11 @@ start_decompress(pc_ctx_t *pctx, const char *filename, char *to_filename) } add_fname(pctx->archive_temp_file); } + uncompfd = -1; if (setup_extractor(pctx) == -1) { log_msg(LOG_ERR, 0, "Setup of extraction context failed."); UNCOMP_BAIL; } - uncompfd = pctx->uncompfd; if (start_extractor(pctx) == -1) { log_msg(LOG_ERR, 0, "Unable to start extraction thread."); @@ -1402,7 +1402,7 @@ uncomp_done: /* * Ownership and mode of target should be same as original. */ - if (filename != NULL) { + if (filename != NULL && uncompfd != -1) { fchmod(uncompfd, sbuf.st_mode); if (fchown(uncompfd, sbuf.st_uid, sbuf.st_gid) == -1) log_msg(LOG_ERR, 1, "Chown "); @@ -1737,12 +1737,17 @@ repeat: pctx->avg_chunk += tdat->len_cmp; } - wbytes = Write(w->wfd, tdat->cmp_seg, tdat->len_cmp); + if (pctx->archive_mode && tdat->decompressing) { + wbytes = archiver_write(pctx, tdat->cmp_seg, tdat->len_cmp); + } else { + wbytes = Write(w->wfd, tdat->cmp_seg, tdat->len_cmp); + } if (pctx->archive_temp_fd != -1 && wbytes == tdat->len_cmp) { wbytes = Write(pctx->archive_temp_fd, tdat->cmp_seg, tdat->len_cmp); } if (unlikely(wbytes != tdat->len_cmp)) { - log_msg(LOG_ERR, 1, "Chunk Write: "); + log_msg(LOG_ERR, 1, "Chunk Write (expected: %" PRIu64 ", written: %" PRIu64 ") : ", + tdat->len_cmp, wbytes); do_cancel: pctx->main_cancel = 1; tdat->cancel = 1; @@ -2720,7 +2725,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) strcpy(pctx->exec_name, pos); pthread_mutex_lock(&opt_parse); - while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDGEe:w:rLPS:B:Fk:avn")) != -1) { + while ((opt = getopt(argc, argv, "dc:s:l:pt:MCDGEe:w:LPS:B:Fk:avnmK")) != -1) { int ovr; int64_t chunksize; @@ -2865,6 +2870,14 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) pctx->enable_archive_sort = -1; break; + case 'm': + pctx->force_archive_perms = 1; + break; + + case 'K': + pctx->no_overwrite_newer = 1; + break; + case '?': default: return (2); @@ -2972,8 +2985,9 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[]) char *filename; if ((filename = realpath(argv[my_optind], NULL)) != NULL) { + free(filename); *fn = slab_alloc(NULL, sizeof (struct fn_list)); - (*fn)->filename = filename; + (*fn)->filename = strdup(argv[my_optind]); (*fn)->next = NULL; fn = &((*fn)->next); valid_paths++; diff --git a/pcompress.h b/pcompress.h index dcd965a..52968fb 100644 --- a/pcompress.h +++ b/pcompress.h @@ -202,13 +202,14 @@ typedef struct pc_ctx { int verbose; int enable_archive_sort; int pagesize; - int uncompfd, compfd; + int force_archive_perms; + int no_overwrite_newer; /* * Archiving related context data. */ char archive_members_file[MAXPATHLEN]; - int archive_members_fd, archive_data_fd; + int archive_members_fd; uint32_t archive_members_count; void *archive_ctx, *archive_sort_buf; pthread_t archive_thread;