Use Libbsc for DNA Sequence data instead of PPMD. Faster, better compression.

Fix pz extension handling for real.
This commit is contained in:
Moinak Ghosh 2013-11-30 09:58:21 +05:30
parent dfeea8c19b
commit c4c4b47138
4 changed files with 12 additions and 8 deletions

View file

@ -288,7 +288,7 @@ adapt_compress(void *src, uint64_t srclen, void *dst,
} else {
#ifdef ENABLE_PC_LIBBSC
if (adat->bsc_data && (PC_SUBTYPE(btype) == TYPE_MARKUP ||
PC_SUBTYPE(btype) == TYPE_BMP)) {
PC_SUBTYPE(btype) == TYPE_BMP || PC_SUBTYPE(btype) == TYPE_DNA_SEQ)) {
rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data);
if (rv < 0)
return (rv);

View file

@ -2050,7 +2050,12 @@ start_compress(pc_ctx_t *pctx, const char *filename, uint64_t chunksize, int lev
}
add_fname(tmpfile1);
} else {
snprintf(to_filename, sizeof (to_filename), "%s" COMP_EXTN, pctx->to_filename);
if (!endswith(pctx->to_filename, COMP_EXTN))
snprintf(to_filename, sizeof (to_filename),
"%s" COMP_EXTN, pctx->to_filename);
else
snprintf(to_filename, sizeof (to_filename),
"%s", pctx->to_filename);
if ((compfd = open(to_filename, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR)) == -1) {
log_msg(LOG_ERR, 1, "open ");
COMP_BAIL;
@ -3131,8 +3136,7 @@ init_pc_context(pc_ctx_t *pctx, int argc, char *argv[])
}
} else {
strcpy(apath, pctx->filename);
if (!endswith(apath, COMP_EXTN))
strcat(apath, COMP_EXTN);
strcat(apath, COMP_EXTN);
pctx->to_filename = realpath(apath, NULL);
/* Check if compressed file exists */

View file

@ -69,11 +69,11 @@ struct ext_entry {
{"lua" , TYPE_TEXT, 3},
{"qml" , TYPE_TEXT|TYPE_MARKUP, 3},
{"fa" , TYPE_TEXT|TYPE_DNA_SEQ, 2},
{"faa" , TYPE_TEXT, 3},
{"faa" , TYPE_TEXT|TYPE_DNA_SEQ, 3},
{"asn" , TYPE_TEXT|TYPE_MARKUP, 3},
{"ffn" , TYPE_TEXT|TYPE_DNA_SEQ, 3},
{"fna" , TYPE_TEXT|TYPE_DNA_SEQ, 3},
{"frn" , TYPE_TEXT, 3},
{"frn" , TYPE_TEXT|TYPE_DNA_SEQ, 3},
{"gbk" , TYPE_TEXT, 3},
{"gff" , TYPE_TEXT, 3},
{"ptt" , TYPE_TEXT, 3},

View file

@ -58,11 +58,11 @@ qml,TYPE_TEXT|TYPE_MARKUP
# These are all genomic data file extensions
fa,TYPE_TEXT|TYPE_DNA_SEQ
faa,TYPE_TEXT
faa,TYPE_TEXT|TYPE_DNA_SEQ
asn,TYPE_TEXT|TYPE_MARKUP
ffn,TYPE_TEXT|TYPE_DNA_SEQ
fna,TYPE_TEXT|TYPE_DNA_SEQ
frn,TYPE_TEXT
frn,TYPE_TEXT|TYPE_DNA_SEQ
gbk,TYPE_TEXT
gff,TYPE_TEXT
ptt,TYPE_TEXT