Add ASM version of Skein for x64 platforms with auto-detection
Error checking for checksum flag when decompressing Update comments and READMEs
This commit is contained in:
parent
eda312ce1e
commit
4ba840b255
7 changed files with 1387 additions and 5 deletions
6
INSTALL
6
INSTALL
|
@ -8,6 +8,12 @@ The simplest process to build and install this utility is:
|
||||||
make
|
make
|
||||||
make install
|
make install
|
||||||
|
|
||||||
|
In order to remove all binaries:
|
||||||
|
make clean
|
||||||
|
|
||||||
|
Remove all binaries and the generated Makefile:
|
||||||
|
make distclean
|
||||||
|
|
||||||
Strictly speaking the 'make install' step is not required to
|
Strictly speaking the 'make install' step is not required to
|
||||||
run the utility as it is a single stand alone program for now.
|
run the utility as it is a single stand alone program for now.
|
||||||
|
|
||||||
|
|
12
Makefile.in
12
Makefile.in
|
@ -62,8 +62,11 @@ LZPSRCS = lzp/lzp.c
|
||||||
LZPHDRS = lzp/lzp.h
|
LZPHDRS = lzp/lzp.h
|
||||||
LZPOBJS = $(LZPSRCS:.c=.o)
|
LZPOBJS = $(LZPSRCS:.c=.o)
|
||||||
|
|
||||||
SKEINSRCS = crypto/skein/SHA3api_ref.c crypto/skein/skein_block.c \
|
SKEIN_BLOCK_C = crypto/skein/skein_block.c
|
||||||
crypto/skein/skein.c crypto/skein/skein_debug.c
|
SKEIN_BLOCK_ASM = crypto/skein/skein_block_x64.s
|
||||||
|
SKEIN_BLOCK_SRC = @SKEIN_BLOCK@
|
||||||
|
SKEIN_BLOCK_OBJ = crypto/skein/skein_block.o
|
||||||
|
SKEINSRCS = crypto/skein/SHA3api_ref.c crypto/skein/skein.c crypto/skein/skein_debug.c
|
||||||
SKEINHDRS = crypto/skein/brg_endian.h crypto/skein/SHA3api_ref.h \
|
SKEINHDRS = crypto/skein/brg_endian.h crypto/skein/SHA3api_ref.h \
|
||||||
crypto/skein/skein.h crypto/skein/skein_port.h crypto/skein/brg_types.h \
|
crypto/skein/skein.h crypto/skein/skein_port.h crypto/skein/brg_types.h \
|
||||||
crypto/skein/skein_debug.h crypto/skein/skein_iv.h
|
crypto/skein/skein_debug.h crypto/skein/skein_iv.h
|
||||||
|
@ -87,7 +90,7 @@ COMMON_VEC_FLAGS = -ftree-vectorize
|
||||||
COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
COMMON_LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block
|
||||||
LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm @LIBBSCLFLAGS@
|
LDLIBS = -ldl -lbz2 $(ZLIB_DIR) -lz -lm @LIBBSCLFLAGS@
|
||||||
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
|
OBJS = $(MAINOBJS) $(LZMAOBJS) $(PPMDOBJS) $(LZFXOBJS) $(LZ4OBJS) $(CRCOBJS) \
|
||||||
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS)
|
$(RABINOBJS) $(BSDIFFOBJS) $(LZPOBJS) @LIBBSCWRAPOBJ@ $(SKEINOBJS) $(SKEIN_BLOCK_OBJ)
|
||||||
|
|
||||||
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
|
DEBUG_LINK = g++ -m64 -pthread -msse3 @LIBBSCGEN_OPT@
|
||||||
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
|
DEBUG_COMPILE = gcc -m64 -g -msse3 -c
|
||||||
|
@ -146,6 +149,9 @@ $(LZ4OBJS): $(LZ4SRCS) $(LZ4HDRS)
|
||||||
$(LZPOBJS): $(LZPSRCS) $(LZPHDRS)
|
$(LZPOBJS): $(LZPSRCS) $(LZPHDRS)
|
||||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||||
|
|
||||||
|
$(SKEIN_BLOCK_OBJ): $(SKEIN_BLOCK_SRC)
|
||||||
|
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(SKEIN_BLOCK_SRC) -o $@
|
||||||
|
|
||||||
$(SKEINOBJS): $(SKEINSRCS) $(SKEINHDRS)
|
$(SKEINOBJS): $(SKEINSRCS) $(SKEINHDRS)
|
||||||
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
$(COMPILE) $(GEN_OPT) $(VEC_FLAGS) $(CPPFLAGS) $(@:.o=.c) -o $@
|
||||||
|
|
||||||
|
|
|
@ -86,6 +86,15 @@ NOTE: The option "libbsc" uses Ilya Grebnov's block sorting compression library
|
||||||
'-L' - Enable LZP pre-compression. This improves compression ratio of all
|
'-L' - Enable LZP pre-compression. This improves compression ratio of all
|
||||||
algorithms with some extra CPU and very low RAM overhead. Using
|
algorithms with some extra CPU and very low RAM overhead. Using
|
||||||
delta encoding in conjunction with this may not always be beneficial.
|
delta encoding in conjunction with this may not always be beneficial.
|
||||||
|
'-S' <cksum>
|
||||||
|
- Specify chunk checksum to use: CRC64, SKEIN256, SKEIN512
|
||||||
|
Default one is SKEIN256. The implementation actually uses SKEIN
|
||||||
|
512-256. This is 25% slower than simple CRC64 but is many times more
|
||||||
|
robust than CRC64 in detecting data integrity errors. SKEIN is a
|
||||||
|
finalist in the NIST SHA-3 standard selection process and is one of
|
||||||
|
the fastest in the group, especially on x86 platforms. BLAKE is faster
|
||||||
|
than SKEIN on a few platforms.
|
||||||
|
SKEIN 512-256 is about 60% faster than SHA 512-256 on x64 platforms.
|
||||||
'-M' - Display memory allocator statistics
|
'-M' - Display memory allocator statistics
|
||||||
'-C' - Display compression statistics
|
'-C' - Display compression statistics
|
||||||
|
|
||||||
|
|
21
config
21
config
|
@ -72,6 +72,25 @@ else
|
||||||
typ="RELEASE"
|
typ="RELEASE"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
OS=$(uname)
|
||||||
|
skeinblock='\$\(SKEIN_BLOCK_C\)'
|
||||||
|
if [ "$OS" = "Linux" ]
|
||||||
|
then
|
||||||
|
plat=$(uname -r)
|
||||||
|
elif [ "$OS" = "SunOS" ]
|
||||||
|
then
|
||||||
|
plat=$(isainfo -v)
|
||||||
|
else
|
||||||
|
echo "Unsupported OS: $OS"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo $plat | egrep 'x86_64|amd64' > /dev/null
|
||||||
|
if [ $? -eq 0 ]
|
||||||
|
then
|
||||||
|
skeinblock='\$\(SKEIN_BLOCK_ASM\)'
|
||||||
|
fi
|
||||||
|
|
||||||
linkvar="LINK"
|
linkvar="LINK"
|
||||||
compilevar="COMPILE"
|
compilevar="COMPILE"
|
||||||
compilecppvar="COMPILE_cpp"
|
compilecppvar="COMPILE_cpp"
|
||||||
|
@ -83,6 +102,7 @@ rabinoptvar="RABIN_OPT"
|
||||||
noslabcppflagsvar="NO_SLAB_CPPFLAGS"
|
noslabcppflagsvar="NO_SLAB_CPPFLAGS"
|
||||||
debugstatscppflagsvar="DEBUG_STATS_CPPFLAGS"
|
debugstatscppflagsvar="DEBUG_STATS_CPPFLAGS"
|
||||||
prefixvar="PREFIX"
|
prefixvar="PREFIX"
|
||||||
|
skeinblockvar="SKEIN_BLOCK"
|
||||||
|
|
||||||
libbscdirvar="LIBBSCDIR"
|
libbscdirvar="LIBBSCDIR"
|
||||||
libbsclibvar="LIBBSCLIB"
|
libbsclibvar="LIBBSCLIB"
|
||||||
|
@ -115,5 +135,6 @@ s#@${libbsclflagsvar}@#${libbsclflags}#g
|
||||||
s#@${libbscwrapobjvar}@#${libbscwrapobj}#g
|
s#@${libbscwrapobjvar}@#${libbscwrapobj}#g
|
||||||
s#@${libbscgenoptvar}@#${libbscgenopt}#g
|
s#@${libbscgenoptvar}@#${libbscgenopt}#g
|
||||||
s#@${libbsccppflagsvar}@#${libbsccppflags}#g
|
s#@${libbsccppflagsvar}@#${libbsccppflags}#g
|
||||||
|
s#@${skeinblockvar}@#${skeinblock}#g
|
||||||
" > Makefile
|
" > Makefile
|
||||||
|
|
||||||
|
|
1328
crypto/skein/skein_block_x64.s
Normal file
1328
crypto/skein/skein_block_x64.s
Normal file
File diff suppressed because it is too large
Load diff
5
main.c
5
main.c
|
@ -535,7 +535,10 @@ start_decompress(const char *filename, const char *to_filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
cksum = flags & CKSUM_MASK;
|
cksum = flags & CKSUM_MASK;
|
||||||
get_checksum_props(NULL, &cksum, &cksum_bytes);
|
if (get_checksum_props(NULL, &cksum, &cksum_bytes) == -1) {
|
||||||
|
fprintf(stderr, "Invalid checksum algorithm code: %d. File corrupt ?\n", cksum);
|
||||||
|
UNCOMP_BAIL;
|
||||||
|
}
|
||||||
|
|
||||||
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
|
nprocs = sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
if (nthreads > 0 && nthreads < nprocs)
|
if (nthreads > 0 && nthreads < nprocs)
|
||||||
|
|
11
utils.c
11
utils.c
|
@ -335,12 +335,15 @@ compute_checksum(uchar_t *cksum_buf, int cksum, uchar_t *buf, ssize_t bytes)
|
||||||
Skein_512_Update(&ctx, buf, bytes);
|
Skein_512_Update(&ctx, buf, bytes);
|
||||||
Skein_512_Final(&ctx, cksum_buf);
|
Skein_512_Final(&ctx, cksum_buf);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Invalid checksum algorithm code: %d\n", cksum);
|
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check is either the given checksum name or id is valid and
|
||||||
|
* return it's properties.
|
||||||
|
*/
|
||||||
int
|
int
|
||||||
get_checksum_props(char *name, int *cksum, int *cksum_bytes)
|
get_checksum_props(char *name, int *cksum, int *cksum_bytes)
|
||||||
{
|
{
|
||||||
|
@ -357,6 +360,12 @@ get_checksum_props(char *name, int *cksum, int *cksum_bytes)
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Endian independent way of storing the checksum bytes. This is actually
|
||||||
|
* storing in little endian format and a copy can be avoided in x86 land.
|
||||||
|
* However unsightly ifdefs are avoided here since this is not so performance
|
||||||
|
* critical.
|
||||||
|
*/
|
||||||
void
|
void
|
||||||
serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes)
|
serialize_checksum(uchar_t *checksum, uchar_t *buf, int cksum_bytes)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue