From 9eac774eb16bd15b57a7ae6adef3f50c4578ace3 Mon Sep 17 00:00:00 2001 From: Moinak Ghosh Date: Sat, 18 Aug 2012 10:20:52 +0530 Subject: [PATCH] Add multithreaded LZMA port from p7zip Compute balanced thread count between chunk threads and algo threads Generic way to handle querying algorithm parameters Clean up unnecessary includes --- Makefile | 7 +- adaptive_compress.c | 14 +- bzip2_compress.c | 2 +- lz4_compress.c | 9 +- lzfx_compress.c | 2 +- lzma/Common/MyGuidDef.h | 54 +++ lzma/Common/MyWindows.h | 218 +++++++++++ lzma/LzFindMt.c | 793 ++++++++++++++++++++++++++++++++++++++++ lzma/LzFindMt.h | 105 ++++++ lzma/Threads.c | 582 +++++++++++++++++++++++++++++ lzma/Threads.h | 123 +++++++ lzma/basetyps.h | 19 + lzma/windows.h | 194 ++++++++++ lzma_compress.c | 11 +- main.c | 60 +-- none_compress.c | 2 +- pcompress.h | 21 +- ppmd_compress.c | 2 +- utils.c | 42 +++ utils.h | 30 +- zlib_compress.c | 2 +- 21 files changed, 2237 insertions(+), 55 deletions(-) create mode 100644 lzma/Common/MyGuidDef.h create mode 100644 lzma/Common/MyWindows.h create mode 100755 lzma/LzFindMt.c create mode 100755 lzma/LzFindMt.h create mode 100755 lzma/Threads.c create mode 100755 lzma/Threads.h create mode 100644 lzma/basetyps.h create mode 100644 lzma/windows.h diff --git a/Makefile b/Makefile index 1530c81..b421cb0 100644 --- a/Makefile +++ b/Makefile @@ -36,9 +36,10 @@ BSDIFFSRCS = bsdiff/bsdiff.c bsdiff/bspatch.c bsdiff/rle_encoder.c BSDIFFHDRS = bsdiff/bscommon.h utils.h allocator.h BSDIFFOBJS = $(BSDIFFSRCS:.c=.o) -LZMASRCS = lzma/LzmaEnc.c lzma/LzFind.c lzma/LzmaDec.c +LZMASRCS = lzma/LzmaEnc.c lzma/LzFind.c lzma/LzmaDec.c lzma/Threads.c lzma/LzFindMt.c LZMAHDRS = lzma/CpuArch.h lzma/LzFind.h lzma/LzmaEnc.h lzma/Types.h \ - lzma/LzHash.h lzma/LzmaDec.h utils.h + lzma/LzHash.h lzma/LzmaDec.h utils.h lzma/LzFindMt.h lzma/Threads.h lzma/windows.h \ + lzma/Common/MyWindows.h lzma/Common/MyGuidDef.h lzma/basetyps.h LZMAOBJS = $(LZMASRCS:.c=.o) LZFXSRCS = lzfx/lzfx.c @@ -60,7 +61,7 @@ CRCOBJS = $(CRCSRCS:.c=.o) BAKFILES = *~ lzma/*~ lzfx/*~ lz4/*~ rabin/*~ bsdiff/*~ RM = rm -f -CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -D_7ZIP_ST -DNODEFAULT_PROPS \ +CPPFLAGS = -I. -I./lzma -I./lzfx -I./lz4 -I./rabin -I./bsdiff -DNODEFAULT_PROPS \ -DFILE_OFFSET_BITS=64 -D_REENTRANT -D__USE_SSE_INTRIN__ -D_LZMA_PROB32 VEC_FLAGS = -ftree-vectorize LOOP_OPTFLAGS = $(VEC_FLAGS) -floop-interchange -floop-block diff --git a/adaptive_compress.c b/adaptive_compress.c index 20c6672..00d8b96 100644 --- a/adaptive_compress.c +++ b/adaptive_compress.c @@ -53,9 +53,9 @@ extern int bzip2_decompress(void *src, size_t srclen, void *dst, extern int ppmd_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); -extern int lzma_init(void **data, int *level, ssize_t chunksize); +extern int lzma_init(void **data, int *level, int nthreads, ssize_t chunksize); extern int lzma_deinit(void **data); -extern int ppmd_init(void **data, int *level, ssize_t chunksize); +extern int ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize); extern int ppmd_deinit(void **data); struct adapt_data { @@ -79,7 +79,7 @@ adapt_stats(int show) } int -adapt_init(void **data, int *level, ssize_t chunksize) +adapt_init(void **data, int *level, int nthreads, ssize_t chunksize) { struct adapt_data *adat = (struct adapt_data *)(*data); int rv; @@ -87,7 +87,7 @@ adapt_init(void **data, int *level, ssize_t chunksize) if (!adat) { adat = (struct adapt_data *)slab_alloc(NULL, sizeof (struct adapt_data)); adat->adapt_mode = 1; - rv = ppmd_init(&(adat->ppmd_data), level, chunksize); + rv = ppmd_init(&(adat->ppmd_data), level, nthreads, chunksize); adat->lzma_data = NULL; *data = adat; if (*level > 9) *level = 9; @@ -99,7 +99,7 @@ adapt_init(void **data, int *level, ssize_t chunksize) } int -adapt2_init(void **data, int *level, ssize_t chunksize) +adapt2_init(void **data, int *level, int nthreads, ssize_t chunksize) { struct adapt_data *adat = (struct adapt_data *)(*data); int rv, lv; @@ -109,10 +109,10 @@ adapt2_init(void **data, int *level, ssize_t chunksize) adat->adapt_mode = 2; adat->ppmd_data = NULL; lv = *level; - rv = ppmd_init(&(adat->ppmd_data), &lv, chunksize); + rv = ppmd_init(&(adat->ppmd_data), &lv, nthreads, chunksize); lv = *level; if (rv == 0) - rv = lzma_init(&(adat->lzma_data), &lv, chunksize); + rv = lzma_init(&(adat->lzma_data), &lv, nthreads, chunksize); *data = adat; if (*level > 9) *level = 9; } diff --git a/bzip2_compress.c b/bzip2_compress.c index fbb8f54..718e17b 100644 --- a/bzip2_compress.c +++ b/bzip2_compress.c @@ -49,7 +49,7 @@ bzip2_stats(int show) } int -bzip2_init(void **data, int *level, ssize_t chunksize) +bzip2_init(void **data, int *level, int nthreads, ssize_t chunksize) { if (*level > 9) *level = 9; return (0); diff --git a/lz4_compress.c b/lz4_compress.c index c219e43..c127dba 100644 --- a/lz4_compress.c +++ b/lz4_compress.c @@ -51,8 +51,15 @@ lz4_buf_extra(ssize_t buflen) return (LZ4_compressBound(buflen) - buflen + sizeof(int)); } +void +lz4_props(algo_props_t *data, int level, ssize_t chunksize) { + data->compress_mt_capable = 0; + data->decompress_mt_capable = 0; + data->buf_extra = lz4_buf_extra(chunksize); +} + int -lz4_init(void **data, int *level, ssize_t chunksize) +lz4_init(void **data, int *level, int nthreads, ssize_t chunksize) { struct lz4_params *lzdat; int lev; diff --git a/lzfx_compress.c b/lzfx_compress.c index 60e8bf5..01723d8 100644 --- a/lzfx_compress.c +++ b/lzfx_compress.c @@ -40,7 +40,7 @@ lz_fx_stats(int show) } int -lz_fx_init(void **data, int *level, ssize_t chunksize) +lz_fx_init(void **data, int *level, int nthreads, ssize_t chunksize) { struct lzfx_params *lzdat; int lev; diff --git a/lzma/Common/MyGuidDef.h b/lzma/Common/MyGuidDef.h new file mode 100644 index 0000000..3c52cc0 --- /dev/null +++ b/lzma/Common/MyGuidDef.h @@ -0,0 +1,54 @@ +// Common/MyGuidDef.h + +#ifndef GUID_DEFINED +#define GUID_DEFINED + +#include "Types.h" + +typedef struct { + UInt32 Data1; + UInt16 Data2; + UInt16 Data3; + unsigned char Data4[8]; +} GUID; + +#ifdef __cplusplus +#define REFGUID const GUID & +#else +#define REFGUID const GUID * +#endif + +#define REFCLSID REFGUID +#define REFIID REFGUID + +#ifdef __cplusplus +inline int operator==(REFGUID g1, REFGUID g2) +{ + for (int i = 0; i < (int)sizeof(g1); i++) + if (((unsigned char *)&g1)[i] != ((unsigned char *)&g2)[i]) + return 0; + return 1; +} +inline int operator!=(REFGUID g1, REFGUID g2) { return !(g1 == g2); } +#endif + +#ifdef __cplusplus + #define MY_EXTERN_C extern "C" +#else + #define MY_EXTERN_C extern +#endif + +#endif + + +#ifdef DEFINE_GUID +#undef DEFINE_GUID +#endif + +#ifdef INITGUID + #define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ + MY_EXTERN_C const GUID name = { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 } } +#else + #define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ + MY_EXTERN_C const GUID name +#endif diff --git a/lzma/Common/MyWindows.h b/lzma/Common/MyWindows.h new file mode 100644 index 0000000..d91f10f --- /dev/null +++ b/lzma/Common/MyWindows.h @@ -0,0 +1,218 @@ +// MyWindows.h + +#ifndef __MYWINDOWS_H +#define __MYWINDOWS_H + +#ifdef _WIN32 + +#include + +#else + +#include // for wchar_t +#include + +#include "MyGuidDef.h" + +typedef char CHAR; +typedef unsigned char UCHAR; + +#undef BYTE +typedef unsigned char BYTE; + +typedef short SHORT; +typedef unsigned short USHORT; + +#undef WORD +typedef unsigned short WORD; +typedef short VARIANT_BOOL; + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +typedef Int64 LONGLONG; +typedef UInt64 ULONGLONG; + +typedef struct LARGE_INTEGER { LONGLONG QuadPart; }LARGE_INTEGER; +typedef struct _ULARGE_INTEGER { ULONGLONG QuadPart;} ULARGE_INTEGER; + +typedef const CHAR *LPCSTR; + +typedef wchar_t WCHAR; + +#ifdef _UNICODE +typedef WCHAR TCHAR; +#define lstrcpy wcscpy +#define lstrcat wcscat +#define lstrlen wcslen +#else +typedef CHAR TCHAR; +#define lstrcpy strcpy +#define lstrcat strcat +#define lstrlen strlen +#endif +#define _wcsicmp(str1,str2) MyStringCompareNoCase(str1,str2) + +typedef const TCHAR *LPCTSTR; +typedef WCHAR OLECHAR; +typedef const WCHAR *LPCWSTR; +typedef OLECHAR *BSTR; +typedef const OLECHAR *LPCOLESTR; +typedef OLECHAR *LPOLESTR; + +typedef struct _FILETIME +{ + DWORD dwLowDateTime; + DWORD dwHighDateTime; +}FILETIME; + +#define HRESULT LONG +#define FAILED(Status) ((HRESULT)(Status)<0) +typedef ULONG PROPID; +typedef LONG SCODE; + +#define S_OK ((HRESULT)0x00000000L) +#define S_FALSE ((HRESULT)0x00000001L) +#define E_NOTIMPL ((HRESULT)0x80004001L) +#define E_NOINTERFACE ((HRESULT)0x80004002L) +#define E_ABORT ((HRESULT)0x80004004L) +#define E_FAIL ((HRESULT)0x80004005L) +#define STG_E_INVALIDFUNCTION ((HRESULT)0x80030001L) +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) + +#ifdef _MSC_VER +#define STDMETHODCALLTYPE __stdcall +#else +#define STDMETHODCALLTYPE +#endif + +#define STDMETHOD_(t, f) virtual t STDMETHODCALLTYPE f +#define STDMETHOD(f) STDMETHOD_(HRESULT, f) +#define STDMETHODIMP_(type) type STDMETHODCALLTYPE +#define STDMETHODIMP STDMETHODIMP_(HRESULT) + +#define PURE = 0 + +#define MIDL_INTERFACE(x) struct + +#ifdef __cplusplus + +DEFINE_GUID(IID_IUnknown, +0x00000000, 0x0000, 0x0000, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46); +struct IUnknown +{ + STDMETHOD(QueryInterface) (REFIID iid, void **outObject) PURE; + STDMETHOD_(ULONG, AddRef)() PURE; + STDMETHOD_(ULONG, Release)() PURE; + #ifndef _WIN32 + virtual ~IUnknown() {} + #endif +}; + +typedef IUnknown *LPUNKNOWN; + +#endif + +#define VARIANT_TRUE ((VARIANT_BOOL)-1) +#define VARIANT_FALSE ((VARIANT_BOOL)0) + +enum VARENUM +{ + VT_EMPTY = 0, + VT_NULL = 1, + VT_I2 = 2, + VT_I4 = 3, + VT_R4 = 4, + VT_R8 = 5, + VT_CY = 6, + VT_DATE = 7, + VT_BSTR = 8, + VT_DISPATCH = 9, + VT_ERROR = 10, + VT_BOOL = 11, + VT_VARIANT = 12, + VT_UNKNOWN = 13, + VT_DECIMAL = 14, + VT_I1 = 16, + VT_UI1 = 17, + VT_UI2 = 18, + VT_UI4 = 19, + VT_I8 = 20, + VT_UI8 = 21, + VT_INT = 22, + VT_UINT = 23, + VT_VOID = 24, + VT_HRESULT = 25, + VT_FILETIME = 64 +}; + +typedef unsigned short VARTYPE; +typedef WORD PROPVAR_PAD1; +typedef WORD PROPVAR_PAD2; +typedef WORD PROPVAR_PAD3; + +#ifdef __cplusplus + +typedef struct tagPROPVARIANT +{ + VARTYPE vt; + PROPVAR_PAD1 wReserved1; + PROPVAR_PAD2 wReserved2; + PROPVAR_PAD3 wReserved3; + union + { + CHAR cVal; + UCHAR bVal; + SHORT iVal; + USHORT uiVal; + LONG lVal; + ULONG ulVal; + INT intVal; + UINT uintVal; + LARGE_INTEGER hVal; + ULARGE_INTEGER uhVal; + VARIANT_BOOL boolVal; + SCODE scode; + FILETIME filetime; + BSTR bstrVal; + }; +} PROPVARIANT; + +typedef PROPVARIANT tagVARIANT; +typedef tagVARIANT VARIANT; +typedef VARIANT VARIANTARG; + +MY_EXTERN_C HRESULT VariantClear(VARIANTARG *prop); +MY_EXTERN_C HRESULT VariantCopy(VARIANTARG *dest, VARIANTARG *src); + +#endif + +MY_EXTERN_C BSTR SysAllocStringByteLen(LPCSTR psz, UINT len); +MY_EXTERN_C BSTR SysAllocString(const OLECHAR *sz); +MY_EXTERN_C void SysFreeString(BSTR bstr); +MY_EXTERN_C UINT SysStringByteLen(BSTR bstr); +MY_EXTERN_C UINT SysStringLen(BSTR bstr); + +/* MY_EXTERN_C DWORD GetLastError(); */ +MY_EXTERN_C LONG CompareFileTime(const FILETIME* ft1, const FILETIME* ft2); + +#define CP_ACP 0 +#define CP_OEMCP 1 + +typedef enum tagSTREAM_SEEK +{ + STREAM_SEEK_SET = 0, + STREAM_SEEK_CUR = 1, + STREAM_SEEK_END = 2 +} STREAM_SEEK; + +#endif +#endif diff --git a/lzma/LzFindMt.c b/lzma/LzFindMt.c new file mode 100755 index 0000000..db95590 --- /dev/null +++ b/lzma/LzFindMt.c @@ -0,0 +1,793 @@ +/* LzFindMt.c -- multithreaded Match finder for LZ algorithms +2009-09-20 : Igor Pavlov : Public domain */ + +#include "LzHash.h" + +#include "LzFindMt.h" + +void MtSync_Construct(CMtSync *p) +{ + p->wasCreated = False; + p->csWasInitialized = False; + p->csWasEntered = False; + Thread_Construct(&p->thread); + Event_Construct(&p->canStart); + Event_Construct(&p->wasStarted); + Event_Construct(&p->wasStopped); + Semaphore_Construct(&p->freeSemaphore); + Semaphore_Construct(&p->filledSemaphore); +} + +void MtSync_GetNextBlock(CMtSync *p) +{ + if (p->needStart) + { + p->numProcessedBlocks = 1; + p->needStart = False; + p->stopWriting = False; + p->exit = False; + Event_Reset(&p->wasStarted); + Event_Reset(&p->wasStopped); + + Event_Set(&p->canStart); + Event_Wait(&p->wasStarted); + } + else + { + CriticalSection_Leave(&p->cs); + p->csWasEntered = False; + p->numProcessedBlocks++; + Semaphore_Release1(&p->freeSemaphore); + } + Semaphore_Wait(&p->filledSemaphore); + CriticalSection_Enter(&p->cs); + p->csWasEntered = True; +} + +/* MtSync_StopWriting must be called if Writing was started */ + +void MtSync_StopWriting(CMtSync *p) +{ + UInt32 myNumBlocks = p->numProcessedBlocks; + if (!Thread_WasCreated(&p->thread) || p->needStart) + return; + p->stopWriting = True; + if (p->csWasEntered) + { + CriticalSection_Leave(&p->cs); + p->csWasEntered = False; + } + Semaphore_Release1(&p->freeSemaphore); + + Event_Wait(&p->wasStopped); + + while (myNumBlocks++ != p->numProcessedBlocks) + { + Semaphore_Wait(&p->filledSemaphore); + Semaphore_Release1(&p->freeSemaphore); + } + p->needStart = True; +} + +void MtSync_Destruct(CMtSync *p) +{ + if (Thread_WasCreated(&p->thread)) + { + MtSync_StopWriting(p); + p->exit = True; + if (p->needStart) + Event_Set(&p->canStart); + Thread_Wait(&p->thread); + Thread_Close(&p->thread); + } + if (p->csWasInitialized) + { + CriticalSection_Delete(&p->cs); + p->csWasInitialized = False; + } + + Event_Close(&p->canStart); + Event_Close(&p->wasStarted); + Event_Close(&p->wasStopped); + Semaphore_Close(&p->freeSemaphore); + Semaphore_Close(&p->filledSemaphore); + + p->wasCreated = False; +} + +#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } + +static SRes MtSync_Create2(CMtSync *p, unsigned (MY_STD_CALL *startAddress)(void *), void *obj, UInt32 numBlocks) +{ + if (p->wasCreated) + return SZ_OK; + + RINOK_THREAD(CriticalSection_Init(&p->cs)); + p->csWasInitialized = True; + + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); + + RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); + RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); + + p->needStart = True; + + RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); + p->wasCreated = True; + return SZ_OK; +} + +static SRes MtSync_Create(CMtSync *p, unsigned (MY_STD_CALL *startAddress)(void *), void *obj, UInt32 numBlocks) +{ + SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); + if (res != SZ_OK) + MtSync_Destruct(p); + return res; +} + +void MtSync_Init(CMtSync *p) { p->needStart = True; } + +#define kMtMaxValForNormalize 0xFFFFFFFF + +#define DEF_GetHeads2(name, v, action) \ +static void GetHeads ## name(const Byte *p, UInt32 pos, \ +UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \ +{ action; for (; numHeads != 0; numHeads--) { \ +const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } } + +#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) + +DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), hashMask = hashMask; crc = crc; ) +DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) +DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) +DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) +/* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */ + +void HashThreadFunc(CMatchFinderMt *mt) +{ + CMtSync *p = &mt->hashSync; + for (;;) + { + UInt32 numProcessedBlocks = 0; + Event_Wait(&p->canStart); + Event_Set(&p->wasStarted); + for (;;) + { + if (p->exit) + return; + if (p->stopWriting) + { + p->numProcessedBlocks = numProcessedBlocks; + Event_Set(&p->wasStopped); + break; + } + + { + CMatchFinder *mf = mt->MatchFinder; + if (MatchFinder_NeedMove(mf)) + { + CriticalSection_Enter(&mt->btSync.cs); + CriticalSection_Enter(&mt->hashSync.cs); + { + const Byte *beforePtr = MatchFinder_GetPointerToCurrentPos(mf); + const Byte *afterPtr; + MatchFinder_MoveBlock(mf); + afterPtr = MatchFinder_GetPointerToCurrentPos(mf); + mt->pointerToCurPos -= beforePtr - afterPtr; + mt->buffer -= beforePtr - afterPtr; + } + CriticalSection_Leave(&mt->btSync.cs); + CriticalSection_Leave(&mt->hashSync.cs); + continue; + } + + Semaphore_Wait(&p->freeSemaphore); + + MatchFinder_ReadIfRequired(mf); + if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) + { + UInt32 subValue = (mf->pos - mf->historySize - 1); + MatchFinder_ReduceOffsets(mf, subValue); + MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, mf->hashMask + 1); + } + { + UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; + UInt32 num = mf->streamPos - mf->pos; + heads[0] = 2; + heads[1] = num; + if (num >= mf->numHashBytes) + { + num = num - mf->numHashBytes + 1; + if (num > kMtHashBlockSize - 2) + num = kMtHashBlockSize - 2; + mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); + heads[0] += num; + } + mf->pos += num; + mf->buffer += num; + } + } + + Semaphore_Release1(&p->filledSemaphore); + } + } +} + +void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p) +{ + MtSync_GetNextBlock(&p->hashSync); + p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; + p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; + p->hashNumAvail = p->hashBuf[p->hashBufPos++]; +} + +#define kEmptyHashValue 0 + +/* #define MFMT_GM_INLINE */ + +#ifdef MFMT_GM_INLINE + +#define NO_INLINE MY_FAST_CALL + +Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32 *_distances, UInt32 _maxLen, const UInt32 *hash, Int32 limit, UInt32 size, UInt32 *posRes) +{ + do + { + UInt32 *distances = _distances + 1; + UInt32 curMatch = pos - *hash++; + + CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + (_cyclicBufferPos << 1); + UInt32 len0 = 0, len1 = 0; + UInt32 cutValue = _cutValue; + UInt32 maxLen = _maxLen; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + break; + } + { + CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + UInt32 len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + break; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } + pos++; + _cyclicBufferPos++; + cur++; + { + UInt32 num = (UInt32)(distances - _distances); + *_distances = num - 1; + _distances += num; + limit -= num; + } + } + while (limit > 0 && --size != 0); + *posRes = pos; + return limit; +} + +#endif + +void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) +{ + UInt32 numProcessed = 0; + UInt32 curPos = 2; + UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); + distances[1] = p->hashNumAvail; + while (curPos < limit) + { + if (p->hashBufPos == p->hashBufPosLimit) + { + MatchFinderMt_GetNextBlock_Hash(p); + distances[1] = numProcessed + p->hashNumAvail; + if (p->hashNumAvail >= p->numHashBytes) + continue; + for (; p->hashNumAvail != 0; p->hashNumAvail--) + distances[curPos++] = 0; + break; + } + { + UInt32 size = p->hashBufPosLimit - p->hashBufPos; + UInt32 lenLimit = p->matchMaxLen; + UInt32 pos = p->pos; + UInt32 cyclicBufferPos = p->cyclicBufferPos; + if (lenLimit >= p->hashNumAvail) + lenLimit = p->hashNumAvail; + { + UInt32 size2 = p->hashNumAvail - lenLimit + 1; + if (size2 < size) + size = size2; + size2 = p->cyclicBufferSize - cyclicBufferPos; + if (size2 < size) + size = size2; + } + #ifndef MFMT_GM_INLINE + while (curPos < limit && size-- != 0) + { + UInt32 *startDistances = distances + curPos; + UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], + pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, + startDistances + 1, p->numHashBytes - 1) - startDistances); + *startDistances = num - 1; + curPos += num; + cyclicBufferPos++; + pos++; + p->buffer++; + } + #else + { + UInt32 posRes; + curPos = limit - GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, + distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, (Int32)(limit - curPos) , size, &posRes); + p->hashBufPos += posRes - pos; + cyclicBufferPos += posRes - pos; + p->buffer += posRes - pos; + pos = posRes; + } + #endif + + numProcessed += pos - p->pos; + p->hashNumAvail -= pos - p->pos; + p->pos = pos; + if (cyclicBufferPos == p->cyclicBufferSize) + cyclicBufferPos = 0; + p->cyclicBufferPos = cyclicBufferPos; + } + } + distances[0] = curPos; +} + +void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) +{ + CMtSync *sync = &p->hashSync; + if (!sync->needStart) + { + CriticalSection_Enter(&sync->cs); + sync->csWasEntered = True; + } + + BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); + + if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) + { + UInt32 subValue = p->pos - p->cyclicBufferSize; + MatchFinder_Normalize3(subValue, p->son, p->cyclicBufferSize * 2); + p->pos -= subValue; + } + + if (!sync->needStart) + { + CriticalSection_Leave(&sync->cs); + sync->csWasEntered = False; + } +} + +void BtThreadFunc(CMatchFinderMt *mt) +{ + CMtSync *p = &mt->btSync; + for (;;) + { + UInt32 blockIndex = 0; + Event_Wait(&p->canStart); + Event_Set(&p->wasStarted); + for (;;) + { + if (p->exit) + return; + if (p->stopWriting) + { + p->numProcessedBlocks = blockIndex; + MtSync_StopWriting(&mt->hashSync); + Event_Set(&p->wasStopped); + break; + } + Semaphore_Wait(&p->freeSemaphore); + BtFillBlock(mt, blockIndex++); + Semaphore_Release1(&p->filledSemaphore); + } + } +} + +void MatchFinderMt_Construct(CMatchFinderMt *p) +{ + p->hashBuf = 0; + MtSync_Construct(&p->hashSync); + MtSync_Construct(&p->btSync); +} + +void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->hashBuf); + p->hashBuf = 0; +} + +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc) +{ + MtSync_Destruct(&p->hashSync); + MtSync_Destruct(&p->btSync); + MatchFinderMt_FreeMem(p, alloc); +} + +#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) +#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) + +static unsigned MY_STD_CALL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } +static unsigned MY_STD_CALL BtThreadFunc2(void *p) +{ + Byte allocaDummy[0x180]; + int i = 0; + for (i = 0; i < 16; i++) + allocaDummy[i] = (Byte)i; + BtThreadFunc((CMatchFinderMt *)p); + return 0; +} + +SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc) +{ + CMatchFinder *mf = p->MatchFinder; + p->historySize = historySize; + if (kMtBtBlockSize <= matchMaxLen * 4) + return SZ_ERROR_PARAM; + if (p->hashBuf == 0) + { + p->hashBuf = (UInt32 *)alloc->Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); + if (p->hashBuf == 0) + return SZ_ERROR_MEM; + p->btBuf = p->hashBuf + kHashBufferSize; + } + keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); + keepAddBufferAfter += kMtHashBlockSize; + if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) + return SZ_ERROR_MEM; + + RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); + RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); + return SZ_OK; +} + +/* Call it after ReleaseStream / SetStream */ +void MatchFinderMt_Init(CMatchFinderMt *p) +{ + CMatchFinder *mf = p->MatchFinder; + p->btBufPos = p->btBufPosLimit = 0; + p->hashBufPos = p->hashBufPosLimit = 0; + MatchFinder_Init(mf); + p->pointerToCurPos = MatchFinder_GetPointerToCurrentPos(mf); + p->btNumAvailBytes = 0; + p->lzPos = p->historySize + 1; + + p->hash = mf->hash; + p->fixedHashSize = mf->fixedHashSize; + p->crc = mf->crc; + + p->son = mf->son; + p->matchMaxLen = mf->matchMaxLen; + p->numHashBytes = mf->numHashBytes; + p->pos = mf->pos; + p->buffer = mf->buffer; + p->cyclicBufferPos = mf->cyclicBufferPos; + p->cyclicBufferSize = mf->cyclicBufferSize; + p->cutValue = mf->cutValue; +} + +/* ReleaseStream is required to finish multithreading */ +void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) +{ + MtSync_StopWriting(&p->btSync); + /* p->MatchFinder->ReleaseStream(); */ +} + +void MatchFinderMt_Normalize(CMatchFinderMt *p) +{ + MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); + p->lzPos = p->historySize + 1; +} + +void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) +{ + UInt32 blockIndex; + MtSync_GetNextBlock(&p->btSync); + blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); + p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; + p->btBufPosLimit += p->btBuf[p->btBufPos++]; + p->btNumAvailBytes = p->btBuf[p->btBufPos++]; + if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) + MatchFinderMt_Normalize(p); +} + +const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) +{ + return p->pointerToCurPos; +} + +#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); + +UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) +{ + GET_NEXT_BLOCK_IF_REQUIRED; + return p->btNumAvailBytes; +} + +Byte MatchFinderMt_GetIndexByte(CMatchFinderMt *p, Int32 index) +{ + return p->pointerToCurPos[index]; +} + +UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +{ + UInt32 hash2Value, curMatch2; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 lzPos = p->lzPos; + MT_HASH2_CALC + + curMatch2 = hash[hash2Value]; + hash[hash2Value] = lzPos; + + if (curMatch2 >= matchMinPos) + if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + { + *distances++ = 2; + *distances++ = lzPos - curMatch2 - 1; + } + return distances; +} + +UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +{ + UInt32 hash2Value, hash3Value, curMatch2, curMatch3; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 lzPos = p->lzPos; + MT_HASH3_CALC + + curMatch2 = hash[ hash2Value]; + curMatch3 = hash[kFix3HashSize + hash3Value]; + + hash[ hash2Value] = + hash[kFix3HashSize + hash3Value] = + lzPos; + + if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + { + distances[1] = lzPos - curMatch2 - 1; + if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) + { + distances[0] = 3; + return distances + 2; + } + distances[0] = 2; + distances += 2; + } + if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) + { + *distances++ = 3; + *distances++ = lzPos - curMatch3 - 1; + } + return distances; +} + +/* +UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +{ + UInt32 hash2Value, hash3Value, hash4Value, curMatch2, curMatch3, curMatch4; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 lzPos = p->lzPos; + MT_HASH4_CALC + + curMatch2 = hash[ hash2Value]; + curMatch3 = hash[kFix3HashSize + hash3Value]; + curMatch4 = hash[kFix4HashSize + hash4Value]; + + hash[ hash2Value] = + hash[kFix3HashSize + hash3Value] = + hash[kFix4HashSize + hash4Value] = + lzPos; + + if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + { + distances[1] = lzPos - curMatch2 - 1; + if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) + { + distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; + return distances + 2; + } + distances[0] = 2; + distances += 2; + } + if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) + { + distances[1] = lzPos - curMatch3 - 1; + if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) + { + distances[0] = 4; + return distances + 2; + } + distances[0] = 3; + distances += 2; + } + + if (curMatch4 >= matchMinPos) + if ( + cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && + cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] + ) + { + *distances++ = 4; + *distances++ = lzPos - curMatch4 - 1; + } + return distances; +} +*/ + +#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; + +UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances) +{ + const UInt32 *btBuf = p->btBuf + p->btBufPos; + UInt32 len = *btBuf++; + p->btBufPos += 1 + len; + p->btNumAvailBytes--; + { + UInt32 i; + for (i = 0; i < len; i += 2) + { + *distances++ = *btBuf++; + *distances++ = *btBuf++; + } + } + INCREASE_LZ_POS + return len; +} + +UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances) +{ + const UInt32 *btBuf = p->btBuf + p->btBufPos; + UInt32 len = *btBuf++; + p->btBufPos += 1 + len; + + if (len == 0) + { + if (p->btNumAvailBytes-- >= 4) + len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); + } + else + { + /* Condition: there are matches in btBuf with length < p->numHashBytes */ + UInt32 *distances2; + p->btNumAvailBytes--; + distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); + do + { + *distances2++ = *btBuf++; + *distances2++ = *btBuf++; + } + while ((len -= 2) != 0); + len = (UInt32)(distances2 - (distances)); + } + INCREASE_LZ_POS + return len; +} + +#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED +#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; +#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0); + +void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER2_MT { p->btNumAvailBytes--; + SKIP_FOOTER_MT +} + +void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER_MT(2) + UInt32 hash2Value; + MT_HASH2_CALC + hash[hash2Value] = p->lzPos; + SKIP_FOOTER_MT +} + +void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER_MT(3) + UInt32 hash2Value, hash3Value; + MT_HASH3_CALC + hash[kFix3HashSize + hash3Value] = + hash[ hash2Value] = + p->lzPos; + SKIP_FOOTER_MT +} + +/* +void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER_MT(4) + UInt32 hash2Value, hash3Value, hash4Value; + MT_HASH4_CALC + hash[kFix4HashSize + hash4Value] = + hash[kFix3HashSize + hash3Value] = + hash[ hash2Value] = + p->lzPos; + SKIP_FOOTER_MT +} +*/ + +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) +{ + vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; + vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinderMt_GetIndexByte; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; + vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; + switch(p->MatchFinder->numHashBytes) + { + case 2: + p->GetHeadsFunc = GetHeads2; + p->MixMatchesFunc = (Mf_Mix_Matches)0; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; + vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; + break; + case 3: + p->GetHeadsFunc = GetHeads3; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; + break; + default: + /* case 4: */ + p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; + /* p->GetHeadsFunc = GetHeads4; */ + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; + break; + /* + default: + p->GetHeadsFunc = GetHeads5; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; + break; + */ + } +} diff --git a/lzma/LzFindMt.h b/lzma/LzFindMt.h new file mode 100755 index 0000000..17ed237 --- /dev/null +++ b/lzma/LzFindMt.h @@ -0,0 +1,105 @@ +/* LzFindMt.h -- multithreaded Match finder for LZ algorithms +2009-02-07 : Igor Pavlov : Public domain */ + +#ifndef __LZ_FIND_MT_H +#define __LZ_FIND_MT_H + +#include "LzFind.h" +#include "Threads.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define kMtHashBlockSize (1 << 13) +#define kMtHashNumBlocks (1 << 3) +#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) + +#define kMtBtBlockSize (1 << 14) +#define kMtBtNumBlocks (1 << 6) +#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) + +typedef struct _CMtSync +{ + Bool wasCreated; + Bool needStart; + Bool exit; + Bool stopWriting; + + CThread thread; + CAutoResetEvent canStart; + CAutoResetEvent wasStarted; + CAutoResetEvent wasStopped; + CSemaphore freeSemaphore; + CSemaphore filledSemaphore; + Bool csWasInitialized; + Bool csWasEntered; + CCriticalSection cs; + UInt32 numProcessedBlocks; +} CMtSync; + +typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); + +/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ +#define kMtCacheLineDummy 128 + +typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, + UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); + +typedef struct _CMatchFinderMt +{ + /* LZ */ + const Byte *pointerToCurPos; + UInt32 *btBuf; + UInt32 btBufPos; + UInt32 btBufPosLimit; + UInt32 lzPos; + UInt32 btNumAvailBytes; + + UInt32 *hash; + UInt32 fixedHashSize; + UInt32 historySize; + const UInt32 *crc; + + Mf_Mix_Matches MixMatchesFunc; + + /* LZ + BT */ + CMtSync btSync; + Byte btDummy[kMtCacheLineDummy]; + + /* BT */ + UInt32 *hashBuf; + UInt32 hashBufPos; + UInt32 hashBufPosLimit; + UInt32 hashNumAvail; + + CLzRef *son; + UInt32 matchMaxLen; + UInt32 numHashBytes; + UInt32 pos; + Byte *buffer; + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be historySize + 1 */ + UInt32 cutValue; + + /* BT + Hash */ + CMtSync hashSync; + /* Byte hashDummy[kMtCacheLineDummy]; */ + + /* Hash */ + Mf_GetHeads GetHeadsFunc; + CMatchFinder *MatchFinder; +} CMatchFinderMt; + +void MatchFinderMt_Construct(CMatchFinderMt *p); +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc); +SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc); +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable); +void MatchFinderMt_ReleaseStream(CMatchFinderMt *p); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lzma/Threads.c b/lzma/Threads.c new file mode 100755 index 0000000..1b8203f --- /dev/null +++ b/lzma/Threads.c @@ -0,0 +1,582 @@ +/* Threads.c */ + +#include "Threads.h" + +#ifdef ENV_BEOS +#include +#else +#include +#include +#endif + +#include + +#if defined(__linux__) +#define PTHREAD_MUTEX_ERRORCHECK PTHREAD_MUTEX_ERRORCHECK_NP +#endif + +#ifdef ENV_BEOS + +/* TODO : optimize the code and verify the returned values */ + +WRes Thread_Create(CThread *thread, THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE *startAddress)(void *), LPVOID parameter) +{ + thread->_tid = spawn_thread((int32 (*)(void *))startAddress, "CThread", B_LOW_PRIORITY, parameter); + if (thread->_tid >= B_OK) { + resume_thread(thread->_tid); + } else { + thread->_tid = B_BAD_THREAD_ID; + } + thread->_created = 1; + return 0; // SZ_OK; +} + +WRes Thread_Wait(CThread *thread) +{ + int ret; + + if (thread->_created == 0) + return EINVAL; + + if (thread->_tid >= B_OK) + { + status_t exit_value; + wait_for_thread(thread->_tid, &exit_value); + thread->_tid = B_BAD_THREAD_ID; + } else { + return EINVAL; + } + + thread->_created = 0; + + return 0; +} + +WRes Thread_Close(CThread *thread) +{ + if (!thread->_created) return SZ_OK; + + thread->_tid = B_BAD_THREAD_ID; + thread->_created = 0; + return SZ_OK; +} + + +WRes Event_Create(CEvent *p, BOOL manualReset, int initialSignaled) +{ + p->_index_waiting = 0; + p->_manual_reset = manualReset; + p->_state = (initialSignaled ? TRUE : FALSE); + p->_created = 1; + p->_sem = create_sem(1,"event"); + return 0; +} + +WRes Event_Set(CEvent *p) { + int index; + acquire_sem(p->_sem); + p->_state = TRUE; + for(index = 0 ; index < p->_index_waiting ; index++) + { + send_data(p->_waiting[index], '7zCN', NULL, 0); + } + p->_index_waiting = 0; + release_sem(p->_sem); + return 0; +} + +WRes Event_Reset(CEvent *p) { + acquire_sem(p->_sem); + p->_state = FALSE; + release_sem(p->_sem); + return 0; +} + +WRes Event_Wait(CEvent *p) { + acquire_sem(p->_sem); + while (p->_state == FALSE) + { + thread_id sender; + p->_waiting[p->_index_waiting++] = find_thread(NULL); + release_sem(p->_sem); + /* int msg = */ receive_data(&sender, NULL, 0); + acquire_sem(p->_sem); + } + if (p->_manual_reset == FALSE) + { + p->_state = FALSE; + } + release_sem(p->_sem); + return 0; +} + +WRes Event_Close(CEvent *p) { + if (p->_created) + { + p->_created = 0; + delete_sem(p->_sem); + } + return 0; +} + +WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount) +{ + p->_index_waiting = 0; + p->_count = initiallyCount; + p->_maxCount = maxCount; + p->_created = 1; + p->_sem = create_sem(1,"sem"); + return 0; +} + +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) +{ + UInt32 newCount; + int index; + + if (releaseCount < 1) return EINVAL; + + acquire_sem(p->_sem); + newCount = p->_count + releaseCount; + if (newCount > p->_maxCount) + { + release_sem(p->_sem); + return EINVAL; + } + p->_count = newCount; + for(index = 0 ; index < p->_index_waiting ; index++) + { + send_data(p->_waiting[index], '7zCN', NULL, 0); + } + p->_index_waiting = 0; + release_sem(p->_sem); + return 0; +} + +WRes Semaphore_Wait(CSemaphore *p) { + acquire_sem(p->_sem); + while (p->_count < 1) + { + thread_id sender; + p->_waiting[p->_index_waiting++] = find_thread(NULL); + release_sem(p->_sem); + /* int msg = */ receive_data(&sender, NULL, 0); + acquire_sem(p->_sem); + } + p->_count--; + release_sem(p->_sem); + return 0; +} + +WRes Semaphore_Close(CSemaphore *p) { + if (p->_created) + { + p->_created = 0; + delete_sem(p->_sem); + } + return 0; +} + +WRes CriticalSection_Init(CCriticalSection * lpCriticalSection) +{ + lpCriticalSection->_sem = create_sem(1,"cc"); + return 0; +} + +#else /* !ENV_BEOS */ + +WRes Thread_Create(CThread *thread, THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE *startAddress)(void *), LPVOID parameter) +{ + pthread_attr_t attr; + int ret; + + thread->_created = 0; + + ret = pthread_attr_init(&attr); + if (ret) return ret; + + ret = pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_JOINABLE); + if (ret) return ret; + + ret = pthread_create(&thread->_tid, &attr, (void * (*)(void *))startAddress, parameter); + + /* ret2 = */ pthread_attr_destroy(&attr); + + if (ret) return ret; + + thread->_created = 1; + + return 0; // SZ_OK; +} + +WRes Thread_Wait(CThread *thread) +{ + void *thread_return; + int ret; + + if (thread->_created == 0) + return EINVAL; + + ret = pthread_join(thread->_tid,&thread_return); + thread->_created = 0; + + return ret; +} + +WRes Thread_Close(CThread *thread) +{ + if (!thread->_created) return SZ_OK; + + pthread_detach(thread->_tid); + thread->_tid = 0; + thread->_created = 0; + return SZ_OK; +} + +#ifdef DEBUG_SYNCHRO + +#include + +static void dump_error(int ligne,int ret,const char *text,void *param) +{ + printf("\n##T%d#ERROR2 (l=%d) %s : param=%p ret = %d (%s)##\n",(int)pthread_self(),ligne,text,param,ret,strerror(ret)); + // abort(); +} + +WRes Event_Create(CEvent *p, BOOL manualReset, int initialSignaled) +{ + int ret; + pthread_mutexattr_t mutexattr; + memset(&mutexattr,0,sizeof(mutexattr)); + ret = pthread_mutexattr_init(&mutexattr); + if (ret != 0) dump_error(__LINE__,ret,"Event_Create::pthread_mutexattr_init",&mutexattr); + ret = pthread_mutexattr_settype(&mutexattr,PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) dump_error(__LINE__,ret,"Event_Create::pthread_mutexattr_settype",&mutexattr); + ret = pthread_mutex_init(&p->_mutex,&mutexattr); + if (ret != 0) dump_error(__LINE__,ret,"Event_Create::pthread_mutexattr_init",&p->_mutex); + if (ret == 0) + { + ret = pthread_cond_init(&p->_cond,0); + if (ret != 0) dump_error(__LINE__,ret,"Event_Create::pthread_cond_init",&p->_cond); + p->_manual_reset = manualReset; + p->_state = (initialSignaled ? TRUE : FALSE); + p->_created = 1; + } + return ret; +} + +WRes Event_Set(CEvent *p) { + int ret = pthread_mutex_lock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"ES::pthread_mutex_lock",&p->_mutex); + if (ret == 0) + { + p->_state = TRUE; + ret = pthread_cond_broadcast(&p->_cond); + if (ret != 0) dump_error(__LINE__,ret,"ES::pthread_cond_broadcast",&p->_cond); + if (ret == 0) + { + ret = pthread_mutex_unlock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"ES::pthread_mutex_unlock",&p->_mutex); + } + } + return ret; +} + +WRes Event_Reset(CEvent *p) { + int ret = pthread_mutex_lock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"ER::pthread_mutex_lock",&p->_mutex); + if (ret == 0) + { + p->_state = FALSE; + ret = pthread_mutex_unlock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"ER::pthread_mutex_unlock",&p->_mutex); + } + return ret; +} + +WRes Event_Wait(CEvent *p) { + int ret = pthread_mutex_lock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"EW::pthread_mutex_lock",&p->_mutex); + if (ret == 0) + { + while ((p->_state == FALSE) && (ret == 0)) + { + ret = pthread_cond_wait(&p->_cond, &p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"EW::pthread_cond_wait",&p->_mutex); + } + if (ret == 0) + { + if (p->_manual_reset == FALSE) + { + p->_state = FALSE; + } + ret = pthread_mutex_unlock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"EW::pthread_mutex_unlock",&p->_mutex); + } + } + return ret; +} + +WRes Event_Close(CEvent *p) { + if (p->_created) + { + int ret; + p->_created = 0; + ret = pthread_mutex_destroy(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"EC::pthread_mutex_destroy",&p->_mutex); + ret = pthread_cond_destroy(&p->_cond); + if (ret != 0) dump_error(__LINE__,ret,"EC::pthread_cond_destroy",&p->_cond); + } + return 0; +} + +WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount) +{ + int ret; + pthread_mutexattr_t mutexattr; + memset(&mutexattr,0,sizeof(mutexattr)); + ret = pthread_mutexattr_init(&mutexattr); + if (ret != 0) dump_error(__LINE__,ret,"SemC::pthread_mutexattr_init",&mutexattr); + ret = pthread_mutexattr_settype(&mutexattr,PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) dump_error(__LINE__,ret,"SemC::pthread_mutexattr_settype",&mutexattr); + ret = pthread_mutex_init(&p->_mutex,&mutexattr); + if (ret != 0) dump_error(__LINE__,ret,"SemC::pthread_mutexattr_init",&p->_mutex); + if (ret == 0) + { + ret = pthread_cond_init(&p->_cond,0); + if (ret != 0) dump_error(__LINE__,ret,"SemC::pthread_cond_init",&p->_mutex); + p->_count = initiallyCount; + p->_maxCount = maxCount; + p->_created = 1; + } + return ret; +} + +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) +{ + int ret; + if (releaseCount < 1) return EINVAL; + + ret = pthread_mutex_lock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"SemR::pthread_mutex_lock",&p->_mutex); + if (ret == 0) + { + UInt32 newCount = p->_count + releaseCount; + if (newCount > p->_maxCount) + { + ret = pthread_mutex_unlock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"SemR::pthread_mutex_unlock",&p->_mutex); + return EINVAL; + } + p->_count = newCount; + ret = pthread_cond_broadcast(&p->_cond); + if (ret != 0) dump_error(__LINE__,ret,"SemR::pthread_cond_broadcast",&p->_cond); + if (ret == 0) + { + ret = pthread_mutex_unlock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"SemR::pthread_mutex_unlock",&p->_mutex); + } + } + return ret; +} + +WRes Semaphore_Wait(CSemaphore *p) { + int ret = pthread_mutex_lock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"SemW::pthread_mutex_lock",&p->_mutex); + if (ret == 0) + { + while ((p->_count < 1) && (ret == 0)) + { + ret = pthread_cond_wait(&p->_cond, &p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"SemW::pthread_cond_wait",&p->_mutex); + } + if (ret == 0) + { + p->_count--; + ret = pthread_mutex_unlock(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"SemW::pthread_mutex_unlock",&p->_mutex); + } + } + return ret; +} + +WRes Semaphore_Close(CSemaphore *p) { + if (p->_created) + { + int ret; + p->_created = 0; + ret = pthread_mutex_destroy(&p->_mutex); + if (ret != 0) dump_error(__LINE__,ret,"Semc::pthread_mutex_destroy",&p->_mutex); + ret = pthread_cond_destroy(&p->_cond); + if (ret != 0) dump_error(__LINE__,ret,"Semc::pthread_cond_destroy",&p->_cond); + } + return 0; +} + +WRes CriticalSection_Init(CCriticalSection * lpCriticalSection) +{ + if (lpCriticalSection) + { + int ret; + pthread_mutexattr_t mutexattr; + memset(&mutexattr,0,sizeof(mutexattr)); + ret = pthread_mutexattr_init(&mutexattr); + if (ret != 0) dump_error(__LINE__,ret,"CS I::pthread_mutexattr_init",&mutexattr); + ret = pthread_mutexattr_settype(&mutexattr,PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) dump_error(__LINE__,ret,"CS I::pthread_mutexattr_settype",&mutexattr); + ret = pthread_mutex_init(&lpCriticalSection->_mutex,&mutexattr); + if (ret != 0) dump_error(__LINE__,ret,"CS I::pthread_mutexattr_init",&lpCriticalSection->_mutex); + return ret; + } + return EINTR; +} + +void CriticalSection_Enter(CCriticalSection * lpCriticalSection) +{ + if (lpCriticalSection) + { + int ret = pthread_mutex_lock(&(lpCriticalSection->_mutex)); + if (ret != 0) dump_error(__LINE__,ret,"CS::pthread_mutex_lock",&(lpCriticalSection->_mutex)); + } +} + +void CriticalSection_Leave(CCriticalSection * lpCriticalSection) +{ + if (lpCriticalSection) + { + int ret = pthread_mutex_unlock(&(lpCriticalSection->_mutex)); + if (ret != 0) dump_error(__LINE__,ret,"CS::pthread_mutex_unlock",&(lpCriticalSection->_mutex)); + } +} + +void CriticalSection_Delete(CCriticalSection * lpCriticalSection) +{ + if (lpCriticalSection) + { + int ret = pthread_mutex_destroy(&(lpCriticalSection->_mutex)); + if (ret != 0) dump_error(__LINE__,ret,"CS::pthread_mutex_destroy",&(lpCriticalSection->_mutex)); + } +} + +#else + +WRes Event_Create(CEvent *p, BOOL manualReset, int initialSignaled) +{ + pthread_mutex_init(&p->_mutex,0); + pthread_cond_init(&p->_cond,0); + p->_manual_reset = manualReset; + p->_state = (initialSignaled ? TRUE : FALSE); + p->_created = 1; + return 0; +} + +WRes Event_Set(CEvent *p) { + pthread_mutex_lock(&p->_mutex); + p->_state = TRUE; + pthread_cond_broadcast(&p->_cond); + pthread_mutex_unlock(&p->_mutex); + return 0; +} + +WRes Event_Reset(CEvent *p) { + pthread_mutex_lock(&p->_mutex); + p->_state = FALSE; + pthread_mutex_unlock(&p->_mutex); + return 0; +} + +WRes Event_Wait(CEvent *p) { + pthread_mutex_lock(&p->_mutex); + while (p->_state == FALSE) + { + pthread_cond_wait(&p->_cond, &p->_mutex); + } + if (p->_manual_reset == FALSE) + { + p->_state = FALSE; + } + pthread_mutex_unlock(&p->_mutex); + return 0; +} + +WRes Event_Close(CEvent *p) { + if (p->_created) + { + p->_created = 0; + pthread_mutex_destroy(&p->_mutex); + pthread_cond_destroy(&p->_cond); + } + return 0; +} + +WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount) +{ + pthread_mutex_init(&p->_mutex,0); + pthread_cond_init(&p->_cond,0); + p->_count = initiallyCount; + p->_maxCount = maxCount; + p->_created = 1; + return 0; +} + +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) +{ + UInt32 newCount; + + if (releaseCount < 1) return EINVAL; + + pthread_mutex_lock(&p->_mutex); + + newCount = p->_count + releaseCount; + if (newCount > p->_maxCount) + { + pthread_mutex_unlock(&p->_mutex); + return EINVAL; + } + p->_count = newCount; + pthread_cond_broadcast(&p->_cond); + pthread_mutex_unlock(&p->_mutex); + return 0; +} + +WRes Semaphore_Wait(CSemaphore *p) { + pthread_mutex_lock(&p->_mutex); + while (p->_count < 1) + { + pthread_cond_wait(&p->_cond, &p->_mutex); + } + p->_count--; + pthread_mutex_unlock(&p->_mutex); + return 0; +} + +WRes Semaphore_Close(CSemaphore *p) { + if (p->_created) + { + p->_created = 0; + pthread_mutex_destroy(&p->_mutex); + pthread_cond_destroy(&p->_cond); + } + return 0; +} + +WRes CriticalSection_Init(CCriticalSection * lpCriticalSection) +{ + return pthread_mutex_init(&(lpCriticalSection->_mutex),0); +} + +#endif /* DEBUG_SYNCHRO */ + +#endif /* ENV_BEOS */ + +WRes ManualResetEvent_Create(CManualResetEvent *p, int initialSignaled) + { return Event_Create(p, TRUE, initialSignaled); } + +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) + { return ManualResetEvent_Create(p, 0); } + +WRes AutoResetEvent_Create(CAutoResetEvent *p, int initialSignaled) + { return Event_Create(p, FALSE, initialSignaled); } +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) + { return AutoResetEvent_Create(p, 0); } + diff --git a/lzma/Threads.h b/lzma/Threads.h new file mode 100755 index 0000000..07b05be --- /dev/null +++ b/lzma/Threads.h @@ -0,0 +1,123 @@ +/* Threads.h -- multithreading library +2008-11-22 : Igor Pavlov : Public domain */ + +#ifndef __7Z_THRESDS_H +#define __7Z_THRESDS_H + +#include "Types.h" +#include "windows.h" + +#ifdef ENV_BEOS +#include +#define MAX_THREAD 256 +#else +#include +#endif + +/* #define DEBUG_SYNCHRO 1 */ + +typedef struct _CThread +{ +#ifdef ENV_BEOS + thread_id _tid; +#else + pthread_t _tid; +#endif + int _created; + +} CThread; + +#define Thread_Construct(thread) (thread)->_created = 0 +#define Thread_WasCreated(thread) ((thread)->_created != 0) + +typedef unsigned THREAD_FUNC_RET_TYPE; +#define THREAD_FUNC_CALL_TYPE MY_STD_CALL +#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE + +typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *); + +WRes Thread_Create(CThread *thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter); +WRes Thread_Wait(CThread *thread); +WRes Thread_Close(CThread *thread); + +typedef struct _CEvent +{ + int _created; + int _manual_reset; + int _state; +#ifdef ENV_BEOS + thread_id _waiting[MAX_THREAD]; + int _index_waiting; + sem_id _sem; +#else + pthread_mutex_t _mutex; + pthread_cond_t _cond; +#endif +} CEvent; + +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; + +#define Event_Construct(event) (event)->_created = 0 +#define Event_IsCreated(event) ((event)->_created) + +WRes ManualResetEvent_Create(CManualResetEvent *event, int initialSignaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *event); +WRes AutoResetEvent_Create(CAutoResetEvent *event, int initialSignaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *event); +WRes Event_Set(CEvent *event); +WRes Event_Reset(CEvent *event); +WRes Event_Wait(CEvent *event); +WRes Event_Close(CEvent *event); + + +typedef struct _CSemaphore +{ + int _created; + UInt32 _count; + UInt32 _maxCount; +#ifdef ENV_BEOS + thread_id _waiting[MAX_THREAD]; + int _index_waiting; + sem_id _sem; +#else + pthread_mutex_t _mutex; + pthread_cond_t _cond; +#endif +} CSemaphore; + +#define Semaphore_Construct(p) (p)->_created = 0 + +WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); +#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1) +WRes Semaphore_Wait(CSemaphore *p); +WRes Semaphore_Close(CSemaphore *p); + +typedef struct { +#ifdef ENV_BEOS + sem_id _sem; +#else + pthread_mutex_t _mutex; +#endif +} CCriticalSection; + +WRes CriticalSection_Init(CCriticalSection *p); +#ifdef ENV_BEOS +#define CriticalSection_Delete(p) delete_sem((p)->_sem) +#define CriticalSection_Enter(p) acquire_sem((p)->_sem) +#define CriticalSection_Leave(p) release_sem((p)->_sem) +#else +#ifdef DEBUG_SYNCHRO +void CriticalSection_Delete(CCriticalSection *); +void CriticalSection_Enter(CCriticalSection *); +void CriticalSection_Leave(CCriticalSection *); +#else +#define CriticalSection_Delete(p) pthread_mutex_destroy(&((p)->_mutex)) +#define CriticalSection_Enter(p) pthread_mutex_lock(&((p)->_mutex)) +#define CriticalSection_Leave(p) pthread_mutex_unlock(&((p)->_mutex)) +#endif +#endif + +#endif + diff --git a/lzma/basetyps.h b/lzma/basetyps.h new file mode 100644 index 0000000..d761e74 --- /dev/null +++ b/lzma/basetyps.h @@ -0,0 +1,19 @@ +#ifndef _BASETYPS_H +#define _BASETYPS_H + +#ifdef ENV_HAVE_GCCVISIBILITYPATCH + #define DLLEXPORT __attribute__ ((visibility("default"))) + #else + #define DLLEXPORT + #endif + +#ifdef __cplusplus +#define STDAPI extern "C" DLLEXPORT HRESULT +#else +#define STDAPI extern DLLEXPORT HRESULT +#endif /* __cplusplus */ + +typedef GUID IID; +typedef GUID CLSID; +#endif + diff --git a/lzma/windows.h b/lzma/windows.h new file mode 100644 index 0000000..ef0ca3d --- /dev/null +++ b/lzma/windows.h @@ -0,0 +1,194 @@ +/* + windows.h - main header file for the Win32 API + + Written by Anders Norlander + + This file is part of a free library for the Win32 API. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +*/ +#ifndef _WINDOWS_H +#define _WINDOWS_H + +#include + +/* BEGIN #include */ + +#include "Common/MyWindows.h" // FIXED + +#ifndef CONST +#define CONST const +#endif + +#undef MAX_PATH +#define MAX_PATH 4096 /* Linux : 4096 - Windows : 260 */ + +#ifndef FALSE +#define FALSE 0 +#endif +#ifndef TRUE +#define TRUE 1 +#endif + +#define WINAPI + +#undef BOOL +typedef int BOOL; + +/* BEGIN #include */ +/* BEGIN */ +#define NO_ERROR 0L +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_INVALID_HANDLE EBADF +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +#define ERROR_NO_MORE_FILES 0x100123 // FIXME + +/* see Common/WyWindows.h +#define S_OK ((HRESULT)0x00000000L) +#define S_FALSE ((HRESULT)0x00000001L) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define E_NOTIMPL ((HRESULT)0x80004001L) +#define E_NOINTERFACE ((HRESULT)0x80004002L) +#define E_ABORT ((HRESULT)0x80004004L) +#define E_FAIL ((HRESULT)0x80004005L) +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define STG_E_INVALIDFUNCTION ((HRESULT)0x80030001L) +#define SUCCEEDED(Status) ((HRESULT)(Status) >= 0) +#define FAILED(Status) ((HRESULT)(Status)<0) +*/ +#ifndef VOID +#define VOID void +#endif +typedef void *PVOID,*LPVOID; +typedef WCHAR *LPWSTR; +typedef CHAR *LPSTR; +typedef TCHAR *LPTSTR; + +#ifdef UNICODE +/* + * P7ZIP_TEXT is a private macro whose specific use is to force the expansion of a + * macro passed as an argument to the macro TEXT. DO NOT use this + * macro within your programs. It's name and function could change without + * notice. + */ +#define P7ZIP_TEXT(q) L##q +#else +#define P7ZIP_TEXT(q) q +#endif +/* + * UNICODE a constant string when UNICODE is defined, else returns the string + * unmodified. + * The corresponding macros _TEXT() and _T() for mapping _UNICODE strings + * passed to C runtime functions are defined in mingw/tchar.h + */ +#define TEXT(q) P7ZIP_TEXT(q) + +typedef BYTE BOOLEAN; + +/* BEGIN #include */ +#ifndef __int64 +#define __int64 long long +#endif +typedef unsigned __int64 UINT64; +typedef __int64 INT64; +/* END #include */ + +#define FILE_ATTRIBUTE_READONLY 1 +#define FILE_ATTRIBUTE_HIDDEN 2 +#define FILE_ATTRIBUTE_SYSTEM 4 +#define FILE_ATTRIBUTE_DIRECTORY 16 +#define FILE_ATTRIBUTE_ARCHIVE 32 +#define FILE_ATTRIBUTE_DEVICE 64 +#define FILE_ATTRIBUTE_NORMAL 128 +#define FILE_ATTRIBUTE_TEMPORARY 256 +#define FILE_ATTRIBUTE_SPARSE_FILE 512 +#define FILE_ATTRIBUTE_REPARSE_POINT 1024 +#define FILE_ATTRIBUTE_COMPRESSED 2048 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ + +/* END */ + +#include +#include + +/* END #include */ + +/* END #include */ + +/* BEGIN #include */ + +#define WAIT_OBJECT_0 0 +#define INFINITE 0xFFFFFFFF + +typedef struct _SYSTEMTIME { + WORD wYear; + WORD wMonth; + WORD wDayOfWeek; + WORD wDay; + WORD wHour; + WORD wMinute; + WORD wSecond; + WORD wMilliseconds; +} SYSTEMTIME; + +#ifdef __cplusplus +extern "C" { +#endif + +BOOL WINAPI DosDateTimeToFileTime(WORD,WORD,FILETIME *); +BOOL WINAPI FileTimeToDosDateTime(CONST FILETIME *,WORD *, WORD *); +BOOL WINAPI FileTimeToLocalFileTime(CONST FILETIME *,FILETIME *); +BOOL WINAPI FileTimeToSystemTime(CONST FILETIME *,SYSTEMTIME *); +BOOL WINAPI LocalFileTimeToFileTime(CONST FILETIME *,FILETIME *); +VOID WINAPI GetSystemTime(SYSTEMTIME *); +BOOL WINAPI SystemTimeToFileTime(const SYSTEMTIME*,FILETIME *); + +DWORD WINAPI GetTickCount(VOID); + +#ifdef __cplusplus +} +#endif +/* END #include */ + +/* BEGIN #include */ + +#define CP_ACP 0 +#define CP_OEMCP 1 +#define CP_UTF8 65001 + +/* #include */ +#include +struct IEnumSTATPROPSTG; + +typedef struct tagSTATPROPSTG { + LPOLESTR lpwstrName; + PROPID propid; + VARTYPE vt; +} STATPROPSTG; + +#ifdef __cplusplus +extern "C" const IID IID_ISequentialStream; +struct ISequentialStream : public IUnknown +{ + STDMETHOD(QueryInterface)(REFIID,PVOID*) PURE; + STDMETHOD_(ULONG,AddRef)(void) PURE; + STDMETHOD_(ULONG,Release)(void) PURE; + STDMETHOD(Read)(void*,ULONG,ULONG*) PURE; + STDMETHOD(Write)(void const*,ULONG,ULONG*) PURE; +}; +#else +extern const IID IID_ISequentialStream; +#endif /* __cplusplus */ + + +/* END #include */ + +#endif + diff --git a/lzma_compress.c b/lzma_compress.c index 8d0b2e6..e0b204a 100644 --- a/lzma_compress.c +++ b/lzma_compress.c @@ -46,11 +46,19 @@ lzma_stats(int show) { } +void +lzma_props(algo_props_t *data, int level, ssize_t chunksize) { + data->compress_mt_capable = 1; + data->decompress_mt_capable = 0; + data->buf_extra = 0; + data->c_max_threads = 2; +} + /* * The two functions below are not thread-safe, by design. */ int -lzma_init(void **data, int *level, ssize_t chunksize) +lzma_init(void **data, int *level, int nthreads, ssize_t chunksize) { if (!p) { p = (CLzmaEncProps *)slab_alloc(NULL, sizeof (CLzmaEncProps)); @@ -99,6 +107,7 @@ lzma_init(void **data, int *level, ssize_t chunksize) } if (*level > 9) *level = 9; p->level = *level; + p->numThreads = nthreads; LzmaEncProps_Normalize(p); slab_cache_add(p->litprob_sz); } diff --git a/main.c b/main.c index f64c70c..ef11cb9 100644 --- a/main.c +++ b/main.c @@ -45,10 +45,6 @@ #include #include #include -#include - -/* Needed for CLzmaEncprops. */ -#include /* * We use 5MB chunks by default. @@ -71,6 +67,7 @@ static compress_func_ptr _decompress_func; static init_func_ptr _init_func; static deinit_func_ptr _deinit_func; static stats_func_ptr _stats_func; +static props_func_ptr _props_func; static int main_cancel; static int adapt_mode = 0; @@ -336,10 +333,12 @@ start_decompress(const char *filename, const char *to_filename) ssize_t chunksize, compressed_chunksize; struct cmp_data **dary, *tdat; pthread_t writer_thr; + algo_props_t props; err = 0; flags = 0; thread = 0; + init_algo_props(&props); /* * Open files and do sanity checks. @@ -405,14 +404,10 @@ start_decompress(const char *filename, const char *to_filename) compressed_chunksize = chunksize + sizeof (chunksize) + sizeof (uint64_t) + sizeof (chunksize) + zlib_buf_extra(chunksize); - /* - * Adjust for LZ4 overflow size if LZ4 was selected. - * TODO: A more generic way to handle this for various algos. I'm too - * lazy to do this at present. - */ - if (strncmp(algo, "lz4", 3) == 0) { - if (chunksize + lz4_buf_extra(chunksize) > compressed_chunksize) { - compressed_chunksize += (chunksize + lz4_buf_extra(chunksize) - + if (_props_func) { + _props_func(&props, level, chunksize); + if (chunksize + props.buf_extra > compressed_chunksize) { + compressed_chunksize += (chunksize + props.buf_extra - compressed_chunksize); } } @@ -424,8 +419,13 @@ start_decompress(const char *filename, const char *to_filename) nprocs = sysconf(_SC_NPROCESSORS_ONLN); if (nthreads > 0 && nthreads < nprocs) nprocs = nthreads; + else + nthreads = nprocs; - fprintf(stderr, "Scaling to %d threads\n", nprocs); + set_threadcounts(&props, &nthreads, nprocs, DECOMPRESS_THREADS); + fprintf(stderr, "Scaling to %d thread", nthreads * props.nthreads); + if (nprocs > 1) fprintf(stderr, "s"); + fprintf(stderr, "\n"); slab_cache_add(compressed_chunksize + CHDR_SZ); slab_cache_add(chunksize); slab_cache_add(sizeof (struct cmp_data)); @@ -463,7 +463,7 @@ start_decompress(const char *filename, const char *to_filename) sem_init(&(tdat->cmp_done_sem), 0, 0); sem_init(&(tdat->write_done_sem), 0, 1); if (_init_func) { - if (_init_func(&(tdat->data), &(tdat->level), chunksize) != 0) { + if (_init_func(&(tdat->data), &(tdat->level), props.nthreads, chunksize) != 0) { UNCOMP_BAIL; } } @@ -826,6 +826,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) pthread_t writer_thr; uchar_t *cread_buf, *pos; rabin_context_t *rctx; + algo_props_t props; /* * Compressed buffer size must include zlib/dedup scratch space and @@ -841,15 +842,12 @@ start_compress(const char *filename, uint64_t chunksize, int level) */ compressed_chunksize = chunksize + sizeof (chunksize) + sizeof (uint64_t) + sizeof (chunksize) + zlib_buf_extra(chunksize); + init_algo_props(&props); - /* - * Adjust for LZ4 overflow size if LZ4 was selected. - * TODO: A more generic way to handle this for various algos. I'm too - * lazy to do this at present. - */ - if (strncmp(algo, "lz4", 3) == 0) { - if (chunksize + lz4_buf_extra(chunksize) > compressed_chunksize) { - compressed_chunksize += (chunksize + lz4_buf_extra(chunksize) - + if (_props_func) { + _props_func(&props, level, chunksize); + if (chunksize + props.buf_extra > compressed_chunksize) { + compressed_chunksize += (chunksize + props.buf_extra - compressed_chunksize); } } @@ -868,6 +866,12 @@ start_compress(const char *filename, uint64_t chunksize, int level) slab_cache_add(compressed_chunksize + CHDR_SZ); slab_cache_add(sizeof (struct cmp_data)); + nprocs = sysconf(_SC_NPROCESSORS_ONLN); + if (nthreads > 0 && nthreads < nprocs) + nprocs = nthreads; + else + nthreads = nprocs; + /* A host of sanity checks. */ if (!pipe_mode) { if ((uncompfd = open(filename, O_RDWR, 0)) == -1) @@ -933,11 +937,8 @@ start_compress(const char *filename, uint64_t chunksize, int level) } } - nprocs = sysconf(_SC_NPROCESSORS_ONLN); - if (nthreads > 0 && nthreads < nprocs) - nprocs = nthreads; - - fprintf(stderr, "Scaling to %d thread", nprocs); + set_threadcounts(&props, &nthreads, nprocs, COMPRESS_THREADS); + fprintf(stderr, "Scaling to %d thread", nthreads * props.nthreads); if (nprocs > 1) fprintf(stderr, "s"); fprintf(stderr, "\n"); @@ -981,7 +982,7 @@ start_compress(const char *filename, uint64_t chunksize, int level) sem_init(&(tdat->cmp_done_sem), 0, 0); sem_init(&(tdat->write_done_sem), 0, 1); if (_init_func) { - if (_init_func(&(tdat->data), &(tdat->level), chunksize) != 0) { + if (_init_func(&(tdat->data), &(tdat->level), props.nthreads, chunksize) != 0) { COMP_BAIL; } } @@ -1226,6 +1227,7 @@ init_algo(const char *algo, int bail) /* Copy given string into known length buffer to avoid memcmp() overruns. */ strncpy(algorithm, algo, 8); + _props_func = NULL; if (memcmp(algorithm, "zlib", 4) == 0) { _compress_func = zlib_compress; _decompress_func = zlib_decompress; @@ -1240,6 +1242,7 @@ init_algo(const char *algo, int bail) _init_func = lzma_init; _deinit_func = lzma_deinit; _stats_func = lzma_stats; + _props_func = lzma_props; rv = 0; } else if (memcmp(algorithm, "bzip2", 5) == 0) { @@ -1272,6 +1275,7 @@ init_algo(const char *algo, int bail) _init_func = lz4_init; _deinit_func = lz4_deinit; _stats_func = lz4_stats; + _props_func = lz4_props; rv = 0; } else if (memcmp(algorithm, "none", 4) == 0) { diff --git a/none_compress.c b/none_compress.c index 54c95ce..9f84297 100644 --- a/none_compress.c +++ b/none_compress.c @@ -37,7 +37,7 @@ none_stats(int show) } int -none_init(void **data, int *level, ssize_t chunksize) +none_init(void **data, int *level, int nthreads, ssize_t chunksize) { return (0); } diff --git a/pcompress.h b/pcompress.h index 5a120bf..ab088ee 100644 --- a/pcompress.h +++ b/pcompress.h @@ -97,15 +97,18 @@ extern int lz4_decompress(void *src, size_t srclen, void *dst, extern int none_decompress(void *src, size_t srclen, void *dst, size_t *dstlen, int level, uchar_t chdr, void *data); -extern int adapt_init(void **data, int *level, ssize_t chunksize); -extern int adapt2_init(void **data, int *level, ssize_t chunksize); -extern int lzma_init(void **data, int *level, ssize_t chunksize); -extern int ppmd_init(void **data, int *level, ssize_t chunksize); -extern int bzip2_init(void **data, int *level, ssize_t chunksize); -extern int zlib_init(void **data, int *level, ssize_t chunksize); -extern int lz_fx_init(void **data, int *level, ssize_t chunksize); -extern int lz4_init(void **data, int *level, ssize_t chunksize); -extern int none_init(void **data, int *level, ssize_t chunksize); +extern int adapt_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int adapt2_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int lzma_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int bzip2_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int zlib_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int lz_fx_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int lz4_init(void **data, int *level, int nthreads, ssize_t chunksize); +extern int none_init(void **data, int *level, int nthreads, ssize_t chunksize); + +extern void lzma_props(algo_props_t *data, int level, ssize_t chunksize); +extern void lz4_props(algo_props_t *data, int level, ssize_t chunksize); extern int zlib_deinit(void **data); extern int adapt_deinit(void **data); diff --git a/ppmd_compress.c b/ppmd_compress.c index 63b6c06..35e377b 100644 --- a/ppmd_compress.c +++ b/ppmd_compress.c @@ -62,7 +62,7 @@ ppmd_stats(int show) } int -ppmd_init(void **data, int *level, ssize_t chunksize) +ppmd_init(void **data, int *level, int nthreads, ssize_t chunksize) { CPpmd8 *_ppmd; diff --git a/utils.c b/utils.c index 33606bf..a50f244 100644 --- a/utils.c +++ b/utils.c @@ -244,3 +244,45 @@ Write(int fd, const void *buf, size_t count) return (count - rem); } +/* + * Thread sizing. We want a balanced combination of chunk threads and compression + * algorithm threads that best fit the available/allowed number of processors. + */ +void +set_threadcounts(algo_props_t *props, int *nthreads, int nprocs, algo_threads_type_t typ) { + int mt_capable; + + if (typ == COMPRESS_THREADS) + mt_capable = props->compress_mt_capable; + else + mt_capable = props->decompress_mt_capable; + + if (mt_capable) { + int nthreads1, p_max; + + if (nprocs == 3) { + props->nthreads = 1; + *nthreads = 3; + return; + } + + if (typ == COMPRESS_THREADS) + p_max = props->c_max_threads; + else + p_max = props->d_max_threads; + + nthreads1 = 1; + props->nthreads = 1; + while (nthreads1 < *nthreads || props->nthreads < p_max) { + if ((props->nthreads+1) * nthreads1 <= nprocs && props->nthreads < p_max) { + props->nthreads++; + + } else if (props->nthreads * (nthreads1+1) <= nprocs && nthreads1 < *nthreads) { + nthreads1++; + } else { + break; + } + } + *nthreads = nthreads1; + } +} diff --git a/utils.h b/utils.h index a3f00c8..05c4c8e 100644 --- a/utils.h +++ b/utils.h @@ -101,6 +101,20 @@ typedef ssize_t bsize_t; #define DEBUG_STAT_EN(...) #endif +typedef struct { + uint32_t buf_extra; + int compress_mt_capable; + int decompress_mt_capable; + int nthreads; + int c_max_threads; + int d_max_threads; +} algo_props_t; + +typedef enum { + COMPRESS_THREADS = 1, + DECOMPRESS_THREADS +} algo_threads_type_t; + extern void err_exit(int show_errno, const char *format, ...); extern const char *get_execname(const char *); extern int parse_numeric(ssize_t *val, const char *str); @@ -109,15 +123,18 @@ extern ssize_t Read(int fd, void *buf, size_t count); extern ssize_t Read_Adjusted(int fd, uchar_t *buf, size_t count, ssize_t *rabin_count, void *ctx); extern ssize_t Write(int fd, const void *buf, size_t count); +extern void set_threadcounts(algo_props_t *props, int *nthreads, int nprocs, + algo_threads_type_t typ); /* Pointer type for compress and decompress functions. */ typedef int (*compress_func_ptr)(void *src, size_t srclen, void *dst, size_t *destlen, int level, uchar_t chdr, void *data); /* Pointer type for algo specific init/deinit/stats functions. */ -typedef int (*init_func_ptr)(void **data, int *level, ssize_t chunksize); +typedef int (*init_func_ptr)(void **data, int *level, int nthreads, ssize_t chunksize); typedef int (*deinit_func_ptr)(void **data); typedef void (*stats_func_ptr)(int show); +typedef void (*props_func_ptr)(algo_props_t *data, int level, ssize_t chunksize); /* @@ -154,6 +171,17 @@ hash6432shift(uint64_t key) return (uint32_t) key; } +static void +init_algo_props(algo_props_t *props) +{ + props->buf_extra = 0; + props->compress_mt_capable = 0; + props->decompress_mt_capable = 0; + props->nthreads = 1; + props->c_max_threads = 1; + props->d_max_threads = 1; +} + #ifdef __cplusplus } #endif diff --git a/zlib_compress.c b/zlib_compress.c index f7ce52a..1ba8ead 100644 --- a/zlib_compress.c +++ b/zlib_compress.c @@ -54,7 +54,7 @@ zlib_buf_extra(ssize_t buflen) } int -zlib_init(void **data, int *level, ssize_t chunksize) +zlib_init(void **data, int *level, int nthreads, ssize_t chunksize) { z_stream *zs; int ret;