init files

This commit is contained in:
AArt1256 2025-11-13 19:07:39 +03:00
commit 8197a022bd
1409 changed files with 139317 additions and 0 deletions

View file

@ -0,0 +1,101 @@
/*
* dictionary.c - dictionary implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "format.h"
#include "lib.h"
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
* @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize) {
unsigned char *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
pDictionaryData = (unsigned char *)malloc(BLOCK_SIZE);
if (!pDictionaryData) {
return LZSA_ERROR_MEMORY;
}
FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
if (!pDictionaryFile) {
free(pDictionaryData);
pDictionaryData = NULL;
return LZSA_ERROR_DICTIONARY;
}
fseek(pDictionaryFile, 0, SEEK_END);
#ifdef _WIN32
__int64 nDictionaryFileSize = _ftelli64(pDictionaryFile);
#else
off_t nDictionaryFileSize = ftello(pDictionaryFile);
#endif
if (nDictionaryFileSize > BLOCK_SIZE) {
/* Use the last BLOCK_SIZE bytes of the dictionary */
fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END);
}
else {
fseek(pDictionaryFile, 0, SEEK_SET);
}
nDictionaryDataSize = (int)fread(pDictionaryData, 1, BLOCK_SIZE, pDictionaryFile);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(pDictionaryFile);
pDictionaryFile = NULL;
}
*ppDictionaryData = pDictionaryData;
*pDictionaryDataSize = nDictionaryDataSize;
return LZSA_OK;
}
/**
* Free dictionary contents
*
* @param ppDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData) {
if (*ppDictionaryData) {
free(*ppDictionaryData);
*ppDictionaryData = NULL;
}
}

View file

@ -0,0 +1,64 @@
/*
* dictionary.h - dictionary definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _DICTIONARY_H
#define _DICTIONARY_H
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
* @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
/**
* Free dictionary contents
*
* @param ppDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData);
#ifdef __cplusplus
}
#endif
#endif /* _DICTIONARY_H */

View file

@ -0,0 +1,224 @@
/*
* expand_block_v1.c - LZSA1 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "format.h"
#include "expand_block_v1.h"
#ifdef _MSC_VER
#define FORCE_INLINE __forceinline
#else /* _MSC_VER */
#define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */
static inline FORCE_INLINE int lzsa_build_literals_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nLiterals) {
unsigned int nByte;
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
nByte = *pInBlock++;
(*nLiterals) += nByte;
if (nByte == 250) {
if (pInBlock < pInBlockEnd) {
(*nLiterals) = 256 + ((unsigned int)*pInBlock++);
}
else {
return -1;
}
}
else if (nByte == 249) {
if ((pInBlock + 1) < pInBlockEnd) {
(*nLiterals) = ((unsigned int)*pInBlock++);
(*nLiterals) |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
*ppInBlock = pInBlock;
return 0;
}
else {
return -1;
}
}
static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nMatchLen) {
unsigned int nByte;
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
nByte = *pInBlock++;
(*nMatchLen) += nByte;
if (nByte == 239) {
if (pInBlock < pInBlockEnd) {
(*nMatchLen) = 256 + ((unsigned int)*pInBlock++);
}
else {
return -1;
}
}
else if (nByte == 238) {
if ((pInBlock + 1) < pInBlockEnd) {
(*nMatchLen) = ((unsigned int)*pInBlock++);
(*nMatchLen) |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
*ppInBlock = pInBlock;
return 0;
}
else {
return -1;
}
}
/**
* Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
const unsigned char *pOutDataFastEnd = pOutDataEnd - 18;
while (pInBlock < pInBlockEnd) {
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
if (nLiterals != LITERALS_RUN_LEN_V1 && (pInBlock + 8) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pInBlock, 8);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
if (nLiterals == LITERALS_RUN_LEN_V1) {
if (lzsa_build_literals_len_v1(&pInBlock, pInBlockEnd, &nLiterals))
return -1;
}
if (nLiterals != 0) {
if ((pInBlock + nLiterals) <= pInBlockEnd &&
(pCurOutData + nLiterals) <= pOutDataEnd) {
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
return -1;
}
}
}
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned int nMatchOffset;
nMatchOffset = ((unsigned int)(*pInBlock++)) ^ 0xff;
if (token & 0x80) {
nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8) ^ 0xff00;
}
nMatchOffset++;
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x0f);
if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 18) <= pOutDataEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 8);
memcpy(pCurOutData + 16, pSrc + 16, 2);
pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
}
else {
nMatchLen += MIN_MATCH_SIZE_V1;
if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen))
return -1;
if (nMatchLen == 0)
break;
}
if ((pSrc + nMatchLen) <= pOutDataEnd) {
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
return -1;
}
}
else {
return -1;
}
}
}
else {
return -1;
}
}
}
return (int)(pCurOutData - (pOutData + nOutDataOffset));
}

View file

@ -0,0 +1,49 @@
/*
* expand_block_v1.h - LZSA1 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_BLOCK_V1_H
#define _EXPAND_BLOCK_V1_H
/**
* Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_BLOCK_V1_H */

View file

@ -0,0 +1,249 @@
/*
* expand_block_v2.c - LZSA2 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "format.h"
#include "expand_block_v2.h"
#ifdef _MSC_VER
#define FORCE_INLINE __forceinline
#else /* _MSC_VER */
#define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */
static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nValue) {
if ((*nCurNibbles ^= 1) != 0) {
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
(*nibbles) = *pInBlock++;
*ppInBlock = pInBlock;
(*nValue) = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
return 0;
}
else {
return -1;
}
}
(*nValue) = (unsigned int)((*nibbles) & 0x0f);
return 0;
}
static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nLength) {
unsigned int nValue;
if (!lzsa_get_nibble_v2(ppInBlock, pInBlockEnd, nCurNibbles, nibbles, &nValue)) {
(*nLength) += nValue;
if (nValue == 15) {
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
(*nLength) += ((unsigned int)*pInBlock++);
if ((*nLength) == 257) {
if ((pInBlock + 1) < pInBlockEnd) {
(*nLength) = ((unsigned int)*pInBlock++) << 8;
(*nLength) |= ((unsigned int)*pInBlock++);
}
else {
return -1;
}
}
else if ((*nLength) == 256) {
(*nLength) = 0;
}
}
else {
return -1;
}
*ppInBlock = pInBlock;
}
return 0;
}
else {
return -1;
}
}
/**
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
int nCurNibbles = 0;
unsigned char nibbles;
int nMatchOffset = 0;
while (pInBlock < pInBlockEnd) {
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token >> LITERALS_LEN_POS_V2) & LITERALS_RUN_LEN_V2);
if (nLiterals != LITERALS_RUN_LEN_V2 && (pInBlock + 4) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pInBlock, 4);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
if (nLiterals == LITERALS_RUN_LEN_V2) {
if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nLiterals))
return -1;
}
if (nLiterals != 0) {
if ((pInBlock + nLiterals) <= pInBlockEnd &&
(pCurOutData + nLiterals) <= pOutDataEnd) {
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
return -1;
}
}
}
if (pInBlock < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned char nOffsetMode = token & 0xc0;
unsigned int nValue;
if ((nOffsetMode & 0x80) == 0x00) {
if ((nOffsetMode & 0x20) == 0x00) {
/* 5 bit offset */
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
return -1;
nMatchOffset = nValue << 1;
nMatchOffset |= ((token & 0x40) >> 6);
nMatchOffset ^= 0x1e;
nMatchOffset++;
}
else {
/* 13 bit offset */
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
return -1;
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (nValue << 9);
nMatchOffset |= (((unsigned int)(token & 0x40)) << 2);
nMatchOffset ^= 0x1eff;
nMatchOffset += (512 + 1);
}
}
else {
if ((nOffsetMode & 0xc0) == 0x80) {
/* 9 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
nMatchOffset ^= 0x0ff;
nMatchOffset++;
}
if ((nOffsetMode & 0xe0) == 0xc0) {
/* 16 bit offset */
nMatchOffset = (((unsigned int)(*pInBlock++)) << 8);
if (pInBlock >= pInBlockEnd) return -1;
nMatchOffset |= (unsigned int)(*pInBlock++);
nMatchOffset ^= 0xffff;
nMatchOffset++;
}
}
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)((token >> MATCH_LEN_POS_V2) & MATCH_RUN_LEN_V2);
if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 10) <= pOutDataEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 2);
pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
}
else {
nMatchLen += MIN_MATCH_SIZE_V2;
if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen))
return -1;
if (nMatchLen == 0)
break;
}
if ((pSrc + nMatchLen) <= pOutDataEnd) {
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
return -1;
}
}
else {
return -1;
}
}
}
else {
return -1;
}
}
}
return (int)(pCurOutData - (pOutData + nOutDataOffset));
}

View file

@ -0,0 +1,49 @@
/*
* expand_block_v2.h - LZSA2 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_BLOCK_V2_H
#define _EXPAND_BLOCK_V2_H
/**
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_BLOCK_V2_H */

View file

@ -0,0 +1,76 @@
/*
* expand_context.h - decompressor context definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "expand_context.h"
#include "expand_block_v1.h"
#include "expand_block_v2.h"
#include "lib.h"
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags (LZSA_FLAG_xxx)
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion, const int nFlags) {
int nDecompressedSize;
if (nFlags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInBlock, nBlockSize);
}
if (nFormatVersion == 1)
nDecompressedSize = lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else if (nFormatVersion == 2)
nDecompressedSize = lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else
nDecompressedSize = -1;
if (nDecompressedSize != -1 && (nFlags & LZSA_FLAG_RAW_BACKWARD)) {
lzsa_reverse_buffer(pOutData + nOutDataOffset, nDecompressedSize);
}
if (nFlags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInBlock, nBlockSize);
}
return nDecompressedSize;
}

View file

@ -0,0 +1,61 @@
/*
* expand_context.h - decompressor context definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_CONTEXT_H
#define _EXPAND_CONTEXT_H
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags (LZSA_FLAG_xxx)
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion, const int nFlags);
#ifdef __cplusplus
}
#endif
#endif /* _EXPAND_CONTEXT_H */

View file

@ -0,0 +1,163 @@
/*
* expand_inmem.c - in-memory decompression implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "expand_inmem.h"
#include "lib.h"
#include "frame.h"
#define BLOCK_SIZE 65536
/**
* Get maximum decompressed size of compressed data
*
* @param pFileData compressed data
* @param nFileSize compressed size in bytes
*
* @return maximum decompressed size
*/
size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize) {
const unsigned char *pCurFileData = pFileData;
const unsigned char *pEndFileData = pCurFileData + nFileSize;
int nFormatVersion = 0;
size_t nMaxDecompressedSize = 0;
const int nHeaderSize = lzsa_get_header_size();
/* Check header */
if ((pCurFileData + nHeaderSize) > pEndFileData ||
lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
return -1;
pCurFileData += nHeaderSize;
while (pCurFileData < pEndFileData) {
unsigned int nBlockDataSize = 0;
int nIsUncompressed = 0;
const int nFrameSize = lzsa_get_frame_size();
/* Decode frame header */
if ((pCurFileData + nFrameSize) > pEndFileData ||
lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
return -1;
pCurFileData += nFrameSize;
if (!nBlockDataSize)
break;
/* Add one potentially full block to the decompressed size */
nMaxDecompressedSize += BLOCK_SIZE;
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
pCurFileData += nBlockDataSize;
}
return nMaxDecompressedSize;
}
/**
* Decompress data in memory
*
* @param pFileData compressed data
* @param pOutBuffer buffer for decompressed data
* @param nFileSize compressed size in bytes
* @param nMaxOutBufferSize maximum capacity of decompression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param pFormatVersion pointer to format version, updated if this function is successful
*
* @return actual decompressed size, or -1 for error
*/
size_t lzsa_decompress_inmem(unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion) {
unsigned char *pCurFileData = pFileData;
const unsigned char *pEndFileData = pCurFileData + nFileSize;
unsigned char *pCurOutBuffer = pOutBuffer;
const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize;
int nPreviousBlockSize;
const int nHeaderSize = lzsa_get_header_size();
if (nFlags & LZSA_FLAG_RAW_BLOCK) {
return (size_t)lzsa_decompressor_expand_block(pFileData, (int)nFileSize, pOutBuffer, 0, (int)nMaxOutBufferSize, *pFormatVersion, nFlags);
}
/* Check header */
if ((pCurFileData + nHeaderSize) > pEndFileData ||
lzsa_decode_header(pCurFileData, nHeaderSize, pFormatVersion) != 0)
return -1;
pCurFileData += nHeaderSize;
nPreviousBlockSize = 0;
while (pCurFileData < pEndFileData) {
unsigned int nBlockDataSize = 0;
int nIsUncompressed = 0;
const int nFrameSize = lzsa_get_frame_size();
/* Decode frame header */
if ((pCurFileData + nFrameSize) > pEndFileData ||
lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
return -1;
pCurFileData += nFrameSize;
if (!nBlockDataSize)
break;
if (!nIsUncompressed) {
int nDecompressedSize;
/* Decompress block */
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
nDecompressedSize = lzsa_decompressor_expand_block(pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize), *pFormatVersion, nFlags);
if (nDecompressedSize < 0)
return -1;
pCurOutBuffer += nDecompressedSize;
nPreviousBlockSize = nDecompressedSize;
}
else {
/* Copy uncompressed block */
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
if ((pCurOutBuffer + nBlockDataSize) > pEndOutBuffer)
return -1;
memcpy(pCurOutBuffer, pCurFileData, nBlockDataSize);
pCurOutBuffer += nBlockDataSize;
}
pCurFileData += nBlockDataSize;
}
return (int)(pCurOutBuffer - pOutBuffer);
}

View file

@ -0,0 +1,70 @@
/*
* expand_inmem.h - in-memory decompression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_INMEM_H
#define _EXPAND_INMEM_H
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get maximum decompressed size of compressed data
*
* @param pFileData compressed data
* @param nFileSize compressed size in bytes
*
* @return maximum decompressed size
*/
size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize);
/**
* Decompress data in memory
*
* @param pFileData compressed data
* @param pOutBuffer buffer for decompressed data
* @param nFileSize compressed size in bytes
* @param nMaxOutBufferSize maximum capacity of decompression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param pFormatVersion pointer to format version, updated if this function is successful
*
* @return actual decompressed size, or -1 for error
*/
size_t lzsa_decompress_inmem(unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion);
#ifdef __cplusplus
}
#endif
#endif /* _EXPAND_INMEM_H */

View file

@ -0,0 +1,236 @@
/*
* expand_streaming.c - streaming decompression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "expand_streaming.h"
#include "format.h"
#include "frame.h"
#include "lib.h"
/*-------------- File API -------------- */
/**
* Decompress file
*
* @param pszInFilename name of input(compressed) file to decompress
* @param pszOutFilename name of output(decompressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize) {
lzsa_stream_t inStream, outStream;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
lzsa_status_t nStatus;
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
return LZSA_ERROR_SRC;
}
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
inStream.close(&inStream);
return LZSA_ERROR_DST;
}
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
if (nStatus) {
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
nStatus = lzsa_decompress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, pOriginalSize, pCompressedSize);
lzsa_dictionary_free(&pDictionaryData);
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
/*-------------- Streaming API -------------- */
/**
* Decompress stream
*
* @param pInStream input(compressed) stream to decompress
* @param pOutStream output(decompressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize) {
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
unsigned char cFrameData[16];
unsigned char *pInBlock;
unsigned char *pOutData;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
const int nHeaderSize = lzsa_get_header_size();
memset(cFrameData, 0, 16);
if (pInStream->read(pInStream, cFrameData, nHeaderSize) != nHeaderSize) {
return LZSA_ERROR_SRC;
}
if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
return LZSA_ERROR_FORMAT;
}
nCompressedSize += (long long)nHeaderSize;
}
pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
if (!pInBlock) {
return LZSA_ERROR_MEMORY;
}
pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pOutData) {
free(pInBlock);
pInBlock = NULL;
return LZSA_ERROR_MEMORY;
}
int nDecompressionError = 0;
int nPrevDecompressedSize = 0;
int nNumBlocks = 0;
while (!pInStream->eof(pInStream) && !nDecompressionError) {
unsigned int nBlockSize = 0;
int nIsUncompressed = 0;
if (nPrevDecompressedSize != 0) {
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
}
else if (nDictionaryDataSize && pDictionaryData) {
nPrevDecompressedSize = nDictionaryDataSize;
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pDictionaryData, nPrevDecompressedSize);
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
const int nFrameSize = lzsa_get_frame_size();
memset(cFrameData, 0, 16);
if (pInStream->read(pInStream, cFrameData, nFrameSize) == nFrameSize) {
if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) {
nDecompressionError = LZSA_ERROR_FORMAT;
nBlockSize = 0;
}
nCompressedSize += (long long)nFrameSize;
}
else {
nDecompressionError = LZSA_ERROR_SRC;
nBlockSize = 0;
}
}
else {
if (!nNumBlocks)
nBlockSize = BLOCK_SIZE;
else
nBlockSize = 0;
}
if (nBlockSize != 0) {
int nDecompressedSize = 0;
if ((int)nBlockSize > BLOCK_SIZE) {
nDecompressionError = LZSA_ERROR_FORMAT;
break;
}
size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize);
if (nFlags & LZSA_FLAG_RAW_BLOCK) {
nBlockSize = (unsigned int)nReadBytes;
}
if (nReadBytes == nBlockSize) {
nCompressedSize += (long long)nReadBytes;
if (nIsUncompressed) {
memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
nDecompressedSize = nBlockSize;
}
else {
nDecompressedSize = lzsa_decompressor_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE, nFormatVersion, nFlags);
if (nDecompressedSize < 0) {
nDecompressionError = LZSA_ERROR_DECOMPRESSION;
break;
}
}
if (nDecompressedSize != 0) {
nOriginalSize += (long long)nDecompressedSize;
if (pOutStream->write(pOutStream, pOutData + BLOCK_SIZE, nDecompressedSize) != nDecompressedSize)
nDecompressionError = LZSA_ERROR_DST;
nPrevDecompressedSize = nDecompressedSize;
nDecompressedSize = 0;
}
}
else {
break;
}
nNumBlocks++;
}
else {
break;
}
}
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
*pOriginalSize = nOriginalSize;
*pCompressedSize = nCompressedSize;
return nDecompressionError;
}

View file

@ -0,0 +1,86 @@
/*
* expand_streaming.h - streaming decompression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_STREAMING_H
#define _EXPAND_STREAMING_H
#include "stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
/*-------------- File API -------------- */
/**
* Decompress file
*
* @param pszInFilename name of input(compressed) file to decompress
* @param pszOutFilename name of output(decompressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
/*-------------- Streaming API -------------- */
/**
* Decompress stream
*
* @param pInStream input(compressed) stream to decompress
* @param pOutStream output(decompressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
#ifdef __cplusplus
}
#endif
#endif /* _EXPAND_STREAMING_H */

54
loader/tools/lzsa/src/format.h Executable file
View file

@ -0,0 +1,54 @@
/*
* format.h - byte stream format definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _FORMAT_H
#define _FORMAT_H
#define MIN_OFFSET 1
#define MAX_OFFSET 0xffff
#define MAX_VARLEN 0xffff
#define BLOCK_SIZE 65536
#define MIN_MATCH_SIZE_V1 3
#define LITERALS_RUN_LEN_V1 7
#define MATCH_RUN_LEN_V1 15
#define MIN_MATCH_SIZE_V2 2
#define LITERALS_RUN_LEN_V2 3
#define MATCH_RUN_LEN_V2 7
#define LITERALS_LEN_POS_V2 0
#define MATCH_LEN_POS_V2 2
#endif /* _FORMAT_H */

View file

@ -0,0 +1,236 @@
/*
* frame.c - frame implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
/*
*
* Changes to implement lzsa2 as depacker/packer for the bitfire demo-framework
* 2021 by Tobias Bindhammer aka Bitbreaker/Performers^Oxyron tobias.bindhammer@uni-ulm.de
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "frame.h"
#define LZSA_ID_0 0x7b
#define LZSA_ID_1 0x9e
/**
* Get compressed file header size
*
* @return file header size
*/
int lzsa_get_header_size(void) {
return 3;
}
/**
* Get compressed frame header size
*
* @return frame header size
*/
int lzsa_get_frame_size(void) {
return 3;
}
/**
* Encode file address big-endian
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nAddress address to encode
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_addr_be(unsigned char *pFrameData, const int nMaxFrameDataSize, int nAddress) {
if (nMaxFrameDataSize >= 2) {
pFrameData[0] = (nAddress >> 8) & 0xff;
pFrameData[1] = nAddress & 0xff;
return 2;
}
else {
return -1;
}
}
/**
* Encode file address little-endian
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nAddress address to encode
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_addr_le(unsigned char *pFrameData, const int nMaxFrameDataSize, int nAddress) {
if (nMaxFrameDataSize >= 2) {
pFrameData[0] = nAddress & 0xff;
pFrameData[1] = (nAddress >> 8) & 0xff;
return 2;
}
else {
return -1;
}
}
/**
* Encode terminal frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataSize) {
if (nMaxFrameDataSize >= 0) {
return 0;
}
else {
return -1;
}
}
/* XXX TODO code below can be removed */
/**
* Encode uncompressed block frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nBlockDataSize uncompressed block's data size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_uncompressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize) {
if (nMaxFrameDataSize >= 3 && nBlockDataSize <= 0x7fffff) {
pFrameData[0] = nBlockDataSize & 0xff;
pFrameData[1] = (nBlockDataSize >> 8) & 0xff;
pFrameData[2] = ((nBlockDataSize >> 16) & 0x7f) | 0x80; /* Uncompressed block */
return 3;
}
else {
return -1;
}
}
/**
* Encode file header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion) {
if (nMaxFrameDataSize >= 3 && (nFormatVersion == 1 || nFormatVersion == 2)) {
pFrameData[0] = LZSA_ID_0; /* Magic number */
pFrameData[1] = LZSA_ID_1;
pFrameData[2] = (nFormatVersion == 2) ? 0x20 : 0; /* Format version 1 */
return 3;
}
else {
return -1;
}
}
/**
* Encode compressed block frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nBlockDataSize compressed block's data size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_compressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize) {
if (nMaxFrameDataSize >= 3 && nBlockDataSize <= 0x7fffff) {
pFrameData[0] = nBlockDataSize & 0xff;
pFrameData[1] = (nBlockDataSize >> 8) & 0xff;
pFrameData[2] = (nBlockDataSize >> 16) & 0x7f;
return 3;
}
else {
return -1;
}
}
/**
* Decode file header
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
*
* @return 0 for success, or -1 for failure
*/
int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion) {
if (nFrameDataSize != 3 ||
pFrameData[0] != LZSA_ID_0 ||
pFrameData[1] != LZSA_ID_1 ||
(pFrameData[2] & 0x1f) != 0 ||
((pFrameData[2] & 0xe0) != 0x00 && (pFrameData[2] & 0xe0) != 0x20)) {
return -1;
}
else {
*nFormatVersion = (pFrameData[2] & 0xe0) ? 2 : 1;
return 0;
}
}
/**
* Decode frame header
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
* @param nBlockSize pointer to block size, updated if this function succeeds (set to 0 if this is the terminal frame)
* @param nIsUncompressed pointer to compressed block flag, updated if this function succeeds
*
* @return 0 for success, or -1 for failure
*/
int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed) {
if (nFrameDataSize == 3) {
*nBlockSize = ((unsigned int)pFrameData[0]) |
(((unsigned int)pFrameData[1]) << 8) |
(((unsigned int)pFrameData[2]) << 16);
*nIsUncompressed = ((*nBlockSize & 0x800000) != 0) ? 1 : 0;
*nBlockSize &= 0x7fffff;
return 0;
}
else {
return -1;
}
}

View file

@ -0,0 +1,144 @@
/*
* frame.h - frame definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _FRAME_H
#define _FRAME_H
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get compressed file header size
*
* @return file header size
*/
int lzsa_get_header_size(void);
/**
* Get compressed frame header size
*
* @return frame header size
*/
int lzsa_get_frame_size(void);
/**
* Encode file address big-endian
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nAddress address to encode
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_addr_be(unsigned char *pFrameData, const int nMaxFrameDataSize, int nAddress);
/**
* Encode file address little-endian
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nAddress address to encode
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_addr_le(unsigned char *pFrameData, const int nMaxFrameDataSize, int nAddress);
/**
* Encode file header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion);
/**
* Encode compressed block frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nBlockDataSize compressed block's data size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_compressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize);
/**
* Encode uncompressed block frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nBlockDataSize uncompressed block's data size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_uncompressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize);
/**
* Encode terminal frame header
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataSize);
/**
* Decode file header
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
*
* @return 0 for success, or -1 for failure
*/
int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion);
/**
* Decode frame header
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
* @param nBlockSize pointer to block size, updated if this function succeeds (set to 0 if this is the terminal frame)
* @param nIsUncompressed pointer to compressed block flag, updated if this function succeeds
*
* @return 0 for success, or -1 for failure
*/
int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed);
#ifdef __cplusplus
}
#endif
#endif /* _FRAME_H */

95
loader/tools/lzsa/src/lib.h Executable file
View file

@ -0,0 +1,95 @@
/*
* lib.h - LZSA library definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _LIB_H
#define _LIB_H
#include "stream.h"
#include "dictionary.h"
#include "frame.h"
#include "format.h"
#include "shrink_context.h"
#include "shrink_streaming.h"
#include "shrink_inmem.h"
#include "expand_context.h"
#include "expand_streaming.h"
#include "expand_inmem.h"
#ifdef __cplusplus
extern "C" {
#endif
/** High level status for compression and decompression */
typedef enum _lzsa_status_t {
LZSA_OK = 0, /**< Success */
LZSA_ERROR_SRC, /**< Error reading input */
LZSA_ERROR_DST, /**< Error reading output */
LZSA_ERROR_DICTIONARY, /**< Error reading dictionary */
LZSA_ERROR_MEMORY, /**< Out of memory */
/* Compression-specific status codes */
LZSA_ERROR_COMPRESSION, /**< Internal compression error */
LZSA_ERROR_RAW_TOOLARGE, /**< Input is too large to be compressed to a raw block */
LZSA_ERROR_RAW_UNCOMPRESSED, /**< Input is incompressible and raw blocks don't support uncompressed data */
/* Decompression-specific status codes */
LZSA_ERROR_FORMAT, /**< Invalid input format or magic number when decompressing */
LZSA_ERROR_DECOMPRESSION /**< Internal decompression error */
} lzsa_status_t;
/* Compression flags */
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
#define LZSA_FLAG_RAW_BACKWARD (1<<2) /**< 1 to compress or decompress raw block backward */
/**
* Reverse bytes in the specified buffer
*
* @param pBuffer pointer to buffer whose contents are to be reversed
* @param nBufferSize size of buffer in bytes
*/
static inline void lzsa_reverse_buffer(unsigned char *pBuffer, const int nBufferSize) {
int nMidPoint = nBufferSize / 2;
int i, j;
for (i = 0, j = nBufferSize - 1; i < nMidPoint; i++, j--) {
unsigned char c = pBuffer[i];
pBuffer[i] = pBuffer[j];
pBuffer[j] = c;
}
}
#ifdef __cplusplus
}
#endif
#endif /* _LIB_H */

View file

@ -0,0 +1,32 @@
# Object files
*.o
*.ko
*.obj
*.elf
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# CMake files/directories
build/

View file

@ -0,0 +1,21 @@
# libdivsufsort Change Log
See full changelog at: https://github.com/y-256/libdivsufsort/commits
## [2.0.1] - 2010-11-11
### Fixed
* Wrong variable used in `divbwt` function
* Enclose some string variables with double quotation marks in include/CMakeLists.txt
* Fix typo in include/CMakeLists.txt
## 2.0.0 - 2008-08-23
### Changed
* Switch the build system to [CMake](http://www.cmake.org/)
* Improve the performance of the suffix-sorting algorithm
### Added
* OpenMP support
* 64-bit version of divsufsort
[Unreleased]: https://github.com/y-256/libdivsufsort/compare/2.0.1...HEAD
[2.0.1]: https://github.com/y-256/libdivsufsort/compare/2.0.0...2.0.1

View file

@ -0,0 +1,99 @@
### cmake file for building libdivsufsort Package ###
cmake_minimum_required(VERSION 2.4.4)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
include(AppendCompilerFlags)
## Project information ##
project(libdivsufsort C)
set(PROJECT_VENDOR "Yuta Mori")
set(PROJECT_CONTACT "yuta.256@gmail.com")
set(PROJECT_URL "https://github.com/y-256/libdivsufsort")
set(PROJECT_DESCRIPTION "A lightweight suffix sorting library")
include(VERSION.cmake)
## CPack configuration ##
set(CPACK_GENERATOR "TGZ;TBZ2;ZIP")
set(CPACK_SOURCE_GENERATOR "TGZ;TBZ2;ZIP")
include(ProjectCPack)
## Project options ##
option(BUILD_SHARED_LIBS "Set to OFF to build static libraries" ON)
option(BUILD_EXAMPLES "Build examples" ON)
option(BUILD_DIVSUFSORT64 "Build libdivsufsort64" OFF)
option(USE_OPENMP "Use OpenMP for parallelization" OFF)
option(WITH_LFS "Enable Large File Support" ON)
## Installation directories ##
set(LIB_SUFFIX "" CACHE STRING "Define suffix of directory name (32 or 64)")
set(CMAKE_INSTALL_RUNTIMEDIR "" CACHE PATH "Specify the output directory for dll runtimes (default is bin)")
if(NOT CMAKE_INSTALL_RUNTIMEDIR)
set(CMAKE_INSTALL_RUNTIMEDIR "${CMAKE_INSTALL_PREFIX}/bin")
endif(NOT CMAKE_INSTALL_RUNTIMEDIR)
set(CMAKE_INSTALL_LIBDIR "" CACHE PATH "Specify the output directory for libraries (default is lib)")
if(NOT CMAKE_INSTALL_LIBDIR)
set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}")
endif(NOT CMAKE_INSTALL_LIBDIR)
set(CMAKE_INSTALL_INCLUDEDIR "" CACHE PATH "Specify the output directory for header files (default is include)")
if(NOT CMAKE_INSTALL_INCLUDEDIR)
set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_PREFIX}/include")
endif(NOT CMAKE_INSTALL_INCLUDEDIR)
set(CMAKE_INSTALL_PKGCONFIGDIR "" CACHE PATH "Specify the output directory for pkgconfig files (default is lib/pkgconfig)")
if(NOT CMAKE_INSTALL_PKGCONFIGDIR)
set(CMAKE_INSTALL_PKGCONFIGDIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
endif(NOT CMAKE_INSTALL_PKGCONFIGDIR)
## Build type ##
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
elseif(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_VERBOSE_MAKEFILE ON)
endif(NOT CMAKE_BUILD_TYPE)
## Compiler options ##
if(MSVC)
append_c_compiler_flags("/W4" "VC" CMAKE_C_FLAGS)
append_c_compiler_flags("/Oi;/Ot;/Ox;/Oy" "VC" CMAKE_C_FLAGS_RELEASE)
if(USE_OPENMP)
append_c_compiler_flags("/openmp" "VC" CMAKE_C_FLAGS)
endif(USE_OPENMP)
elseif(BORLAND)
append_c_compiler_flags("-w" "BCC" CMAKE_C_FLAGS)
append_c_compiler_flags("-Oi;-Og;-Os;-Ov;-Ox" "BCC" CMAKE_C_FLAGS_RELEASE)
else(MSVC)
if(CMAKE_COMPILER_IS_GNUCC)
append_c_compiler_flags("-Wall" "GCC" CMAKE_C_FLAGS)
append_c_compiler_flags("-fomit-frame-pointer" "GCC" CMAKE_C_FLAGS_RELEASE)
if(USE_OPENMP)
append_c_compiler_flags("-fopenmp" "GCC" CMAKE_C_FLAGS)
endif(USE_OPENMP)
else(CMAKE_COMPILER_IS_GNUCC)
append_c_compiler_flags("-Wall" "UNKNOWN" CMAKE_C_FLAGS)
append_c_compiler_flags("-fomit-frame-pointer" "UNKNOWN" CMAKE_C_FLAGS_RELEASE)
if(USE_OPENMP)
append_c_compiler_flags("-fopenmp;-openmp;-omp" "UNKNOWN" CMAKE_C_FLAGS)
endif(USE_OPENMP)
endif(CMAKE_COMPILER_IS_GNUCC)
endif(MSVC)
## Add definitions ##
add_definitions(-DHAVE_CONFIG_H=1 -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS)
## Add subdirectories ##
add_subdirectory(pkgconfig)
add_subdirectory(include)
add_subdirectory(lib)
if(BUILD_EXAMPLES)
add_subdirectory(examples)
endif(BUILD_EXAMPLES)
## Add 'uninstall' target ##
CONFIGURE_FILE(
"${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/cmake_uninstall.cmake.in"
"${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake"
IMMEDIATE @ONLY)
ADD_CUSTOM_TARGET(uninstall
"${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake")

View file

@ -0,0 +1,38 @@
include(CheckCSourceCompiles)
include(CheckCXXSourceCompiles)
macro(append_c_compiler_flags _flags _name _result)
set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
string(TOUPPER "${cname}" cname)
foreach(flag ${_flags})
string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
string(TOUPPER "${flagname}" flagname)
set(have_flag "HAVE_${cname}_${flagname}")
set(CMAKE_REQUIRED_FLAGS "${flag}")
check_c_source_compiles("int main() { return 0; }" ${have_flag})
if(${have_flag})
set(${_result} "${${_result}} ${flag}")
endif(${have_flag})
endforeach(flag)
set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
endmacro(append_c_compiler_flags)
macro(append_cxx_compiler_flags _flags _name _result)
set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
string(TOUPPER "${cname}" cname)
foreach(flag ${_flags})
string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
string(TOUPPER "${flagname}" flagname)
set(have_flag "HAVE_${cname}_${flagname}")
set(CMAKE_REQUIRED_FLAGS "${flag}")
check_cxx_source_compiles("int main() { return 0; }" ${have_flag})
if(${have_flag})
set(${_result} "${${_result}} ${flag}")
endif(${have_flag})
endforeach(flag)
set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
endmacro(append_cxx_compiler_flags)

View file

@ -0,0 +1,15 @@
include(CheckCSourceCompiles)
macro(check_function_keywords _wordlist)
set(${_result} "")
foreach(flag ${_wordlist})
string(REGEX REPLACE "[-+/ ()]" "_" flagname "${flag}")
string(TOUPPER "${flagname}" flagname)
set(have_flag "HAVE_${flagname}")
check_c_source_compiles("${flag} void func(); void func() { } int main() { func(); return 0; }" ${have_flag})
if(${have_flag} AND NOT ${_result})
set(${_result} "${flag}")
# break()
endif(${have_flag} AND NOT ${_result})
endforeach(flag)
endmacro(check_function_keywords)

View file

@ -0,0 +1,109 @@
## Checks for large file support ##
include(CheckIncludeFile)
include(CheckSymbolExists)
include(CheckTypeSize)
macro(check_lfs _isenable)
set(LFS_OFF_T "")
set(LFS_FOPEN "")
set(LFS_FSEEK "")
set(LFS_FTELL "")
set(LFS_PRID "")
if(${_isenable})
set(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}")
set(CMAKE_REQUIRED_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}
-D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64
-D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS)
check_include_file("sys/types.h" HAVE_SYS_TYPES_H)
check_include_file("inttypes.h" HAVE_INTTYPES_H)
check_include_file("stddef.h" HAVE_STDDEF_H)
check_include_file("stdint.h" HAVE_STDINT_H)
# LFS type1: 8 <= sizeof(off_t), fseeko, ftello
check_type_size("off_t" SIZEOF_OFF_T)
if(SIZEOF_OFF_T GREATER 7)
check_symbol_exists("fseeko" "stdio.h" HAVE_FSEEKO)
check_symbol_exists("ftello" "stdio.h" HAVE_FTELLO)
if(HAVE_FSEEKO AND HAVE_FTELLO)
set(LFS_OFF_T "off_t")
set(LFS_FOPEN "fopen")
set(LFS_FSEEK "fseeko")
set(LFS_FTELL "ftello")
check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX)
if(HAVE_PRIDMAX)
set(LFS_PRID "PRIdMAX")
else(HAVE_PRIDMAX)
check_type_size("long" SIZEOF_LONG)
check_type_size("int" SIZEOF_INT)
if(SIZEOF_OFF_T GREATER SIZEOF_LONG)
set(LFS_PRID "\"lld\"")
elseif(SIZEOF_LONG GREATER SIZEOF_INT)
set(LFS_PRID "\"ld\"")
else(SIZEOF_OFF_T GREATER SIZEOF_LONG)
set(LFS_PRID "\"d\"")
endif(SIZEOF_OFF_T GREATER SIZEOF_LONG)
endif(HAVE_PRIDMAX)
endif(HAVE_FSEEKO AND HAVE_FTELLO)
endif(SIZEOF_OFF_T GREATER 7)
# LFS type2: 8 <= sizeof(off64_t), fopen64, fseeko64, ftello64
if(NOT LFS_OFF_T)
check_type_size("off64_t" SIZEOF_OFF64_T)
if(SIZEOF_OFF64_T GREATER 7)
check_symbol_exists("fopen64" "stdio.h" HAVE_FOPEN64)
check_symbol_exists("fseeko64" "stdio.h" HAVE_FSEEKO64)
check_symbol_exists("ftello64" "stdio.h" HAVE_FTELLO64)
if(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64)
set(LFS_OFF_T "off64_t")
set(LFS_FOPEN "fopen64")
set(LFS_FSEEK "fseeko64")
set(LFS_FTELL "ftello64")
check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX)
if(HAVE_PRIDMAX)
set(LFS_PRID "PRIdMAX")
else(HAVE_PRIDMAX)
check_type_size("long" SIZEOF_LONG)
check_type_size("int" SIZEOF_INT)
if(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
set(LFS_PRID "\"lld\"")
elseif(SIZEOF_LONG GREATER SIZEOF_INT)
set(LFS_PRID "\"ld\"")
else(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
set(LFS_PRID "\"d\"")
endif(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
endif(HAVE_PRIDMAX)
endif(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64)
endif(SIZEOF_OFF64_T GREATER 7)
endif(NOT LFS_OFF_T)
# LFS type3: 8 <= sizeof(__int64), _fseeki64, _ftelli64
if(NOT LFS_OFF_T)
check_type_size("__int64" SIZEOF___INT64)
if(SIZEOF___INT64 GREATER 7)
check_symbol_exists("_fseeki64" "stdio.h" HAVE__FSEEKI64)
check_symbol_exists("_ftelli64" "stdio.h" HAVE__FTELLI64)
if(HAVE__FSEEKI64 AND HAVE__FTELLI64)
set(LFS_OFF_T "__int64")
set(LFS_FOPEN "fopen")
set(LFS_FSEEK "_fseeki64")
set(LFS_FTELL "_ftelli64")
set(LFS_PRID "\"I64d\"")
endif(HAVE__FSEEKI64 AND HAVE__FTELLI64)
endif(SIZEOF___INT64 GREATER 7)
endif(NOT LFS_OFF_T)
set(CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}")
endif(${_isenable})
if(NOT LFS_OFF_T)
## not found
set(LFS_OFF_T "long")
set(LFS_FOPEN "fopen")
set(LFS_FSEEK "fseek")
set(LFS_FTELL "ftell")
set(LFS_PRID "\"ld\"")
endif(NOT LFS_OFF_T)
endmacro(check_lfs)

View file

@ -0,0 +1,38 @@
# If the cmake version includes cpack, use it
IF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${PROJECT_DESCRIPTION}")
SET(CPACK_PACKAGE_VENDOR "${PROJECT_VENDOR}")
SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
SET(CPACK_PACKAGE_VERSION_MAJOR "${PROJECT_VERSION_MAJOR}")
SET(CPACK_PACKAGE_VERSION_MINOR "${PROJECT_VERSION_MINOR}")
SET(CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}")
# SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${PROJECT_NAME} ${PROJECT_VERSION}")
SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION_FULL}")
IF(NOT DEFINED CPACK_SYSTEM_NAME)
SET(CPACK_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
ENDIF(NOT DEFINED CPACK_SYSTEM_NAME)
IF(${CPACK_SYSTEM_NAME} MATCHES Windows)
IF(CMAKE_CL_64)
SET(CPACK_SYSTEM_NAME win64-${CMAKE_SYSTEM_PROCESSOR})
ELSE(CMAKE_CL_64)
SET(CPACK_SYSTEM_NAME win32-${CMAKE_SYSTEM_PROCESSOR})
ENDIF(CMAKE_CL_64)
ENDIF(${CPACK_SYSTEM_NAME} MATCHES Windows)
IF(NOT DEFINED CPACK_PACKAGE_FILE_NAME)
SET(CPACK_PACKAGE_FILE_NAME "${CPACK_SOURCE_PACKAGE_FILE_NAME}-${CPACK_SYSTEM_NAME}")
ENDIF(NOT DEFINED CPACK_PACKAGE_FILE_NAME)
SET(CPACK_PACKAGE_CONTACT "${PROJECT_CONTACT}")
IF(UNIX)
SET(CPACK_STRIP_FILES "")
SET(CPACK_SOURCE_STRIP_FILES "")
# SET(CPACK_PACKAGE_EXECUTABLES "ccmake" "CMake")
ENDIF(UNIX)
SET(CPACK_SOURCE_IGNORE_FILES "/CVS/" "/build/" "/\\\\.build/" "/\\\\.svn/" "~$")
# include CPack model once all variables are set
INCLUDE(CPack)
ENDIF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")

View file

@ -0,0 +1,36 @@
IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"")
ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
STRING(REGEX REPLACE "\n" ";" files "${files}")
SET(NUM 0)
FOREACH(file ${files})
IF(EXISTS "$ENV{DESTDIR}${file}")
MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - found")
SET(UNINSTALL_CHECK_${NUM} 1)
ELSE(EXISTS "$ENV{DESTDIR}${file}")
MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - not found")
SET(UNINSTALL_CHECK_${NUM} 0)
ENDIF(EXISTS "$ENV{DESTDIR}${file}")
MATH(EXPR NUM "1 + ${NUM}")
ENDFOREACH(file)
SET(NUM 0)
FOREACH(file ${files})
IF(${UNINSTALL_CHECK_${NUM}})
MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"")
EXEC_PROGRAM(
"@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
OUTPUT_VARIABLE rm_out
RETURN_VALUE rm_retval
)
IF(NOT "${rm_retval}" STREQUAL 0)
MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"")
ENDIF(NOT "${rm_retval}" STREQUAL 0)
ENDIF(${UNINSTALL_CHECK_${NUM}})
MATH(EXPR NUM "1 + ${NUM}")
ENDFOREACH(file)
FILE(REMOVE "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")

View file

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2003 Yuta Mori All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,140 @@
# libdivsufsort
libdivsufsort is a software library that implements a lightweight suffix array construction algorithm.
## News
* 2015-03-21: The project has moved from [Google Code](http://code.google.com/p/libdivsufsort/) to [GitHub](https://github.com/y-256/libdivsufsort)
## Introduction
This library provides a simple and an efficient C API to construct a suffix array and a Burrows-Wheeler transformed string from a given string over a constant-size alphabet.
The algorithm runs in O(n log n) worst-case time using only 5n+O(1) bytes of memory space, where n is the length of
the string.
## Build requirements
* An ANSI C Compiler (e.g. GNU GCC)
* [CMake](http://www.cmake.org/ "CMake") version 2.4.2 or newer
* CMake-supported build tool
## Building on GNU/Linux
1. Get the source code from GitHub. You can either
* use git to clone the repository
```
git clone https://github.com/y-256/libdivsufsort.git
```
* or download a [zip file](../../archive/master.zip) directly
2. Create a `build` directory in the package source directory.
```shell
$ cd libdivsufsort
$ mkdir build
$ cd build
```
3. Configure the package for your system.
If you want to install to a different location, change the -DCMAKE_INSTALL_PREFIX option.
```shell
$ cmake -DCMAKE_BUILD_TYPE="Release" \
-DCMAKE_INSTALL_PREFIX="/usr/local" ..
```
4. Compile the package.
```shell
$ make
```
5. (Optional) Install the library and header files.
```shell
$ sudo make install
```
## API
```c
/* Data types */
typedef int32_t saint_t;
typedef int32_t saidx_t;
typedef uint8_t sauchar_t;
/*
* Constructs the suffix array of a given string.
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The output array or suffixes.
* @param n The length of the given string.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
saint_t
divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
/*
* Constructs the burrows-wheeler transformed string of a given string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @return The primary index if no error occurred, -1 or -2 otherwise.
*/
saidx_t
divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
```
## Example Usage
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <divsufsort.h>
int main() {
// intput data
char *Text = "abracadabra";
int n = strlen(Text);
int i, j;
// allocate
int *SA = (int *)malloc(n * sizeof(int));
// sort
divsufsort((unsigned char *)Text, SA, n);
// output
for(i = 0; i < n; ++i) {
printf("SA[%2d] = %2d: ", i, SA[i]);
for(j = SA[i]; j < n; ++j) {
printf("%c", Text[j]);
}
printf("$\n");
}
// deallocate
free(SA);
return 0;
}
```
See the [examples](examples) directory for a few other examples.
## Benchmarks
See [Benchmarks](https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md) page for details.
## License
libdivsufsort is released under the [MIT license](LICENSE "MIT license").
> The MIT License (MIT)
>
> Copyright (c) 2003 Yuta Mori All rights reserved.
>
> Permission is hereby granted, free of charge, to any person obtaining a copy
> of this software and associated documentation files (the "Software"), to deal
> in the Software without restriction, including without limitation the rights
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> copies of the Software, and to permit persons to whom the Software is
> furnished to do so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in all
> copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> SOFTWARE.
## Author
* Yuta Mori

View file

@ -0,0 +1,23 @@
set(PROJECT_VERSION_MAJOR "2")
set(PROJECT_VERSION_MINOR "0")
set(PROJECT_VERSION_PATCH "2")
set(PROJECT_VERSION_EXTRA "-1")
set(PROJECT_VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}")
set(PROJECT_VERSION_FULL "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}${PROJECT_VERSION_EXTRA}")
set(LIBRARY_VERSION "3.0.1")
set(LIBRARY_SOVERSION "3")
## Git revision number ##
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
execute_process(COMMAND git describe --tags HEAD
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_DESCRIBE_TAGS ERROR_QUIET)
if(GIT_DESCRIBE_TAGS)
string(REGEX REPLACE "^v(.*)" "\\1" GIT_REVISION "${GIT_DESCRIBE_TAGS}")
string(STRIP "${GIT_REVISION}" GIT_REVISION)
if(GIT_REVISION)
set(PROJECT_VERSION_FULL "${GIT_REVISION}")
endif(GIT_REVISION)
endif(GIT_DESCRIBE_TAGS)
endif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")

View file

@ -0,0 +1,11 @@
## Add definitions ##
add_definitions(-D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64)
## Targets ##
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include"
"${CMAKE_CURRENT_BINARY_DIR}/../include")
link_directories("${CMAKE_CURRENT_BINARY_DIR}/../lib")
foreach(src suftest mksary sasearch bwt unbwt)
add_executable(${src} ${src}.c)
target_link_libraries(${src} divsufsort)
endforeach(src)

View file

@ -0,0 +1,220 @@
/*
* bwt.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#if HAVE_STRING_H
# include <string.h>
#endif
#if HAVE_STDLIB_H
# include <stdlib.h>
#endif
#if HAVE_MEMORY_H
# include <memory.h>
#endif
#if HAVE_STDDEF_H
# include <stddef.h>
#endif
#if HAVE_STRINGS_H
# include <strings.h>
#endif
#if HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#if HAVE_IO_H && HAVE_FCNTL_H
# include <io.h>
# include <fcntl.h>
#endif
#include <time.h>
#include <divsufsort.h>
#include "lfs.h"
static
size_t
write_int(FILE *fp, saidx_t n) {
unsigned char c[4];
c[0] = (unsigned char)((n >> 0) & 0xff), c[1] = (unsigned char)((n >> 8) & 0xff),
c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff);
return fwrite(c, sizeof(unsigned char), 4, fp);
}
static
void
print_help(const char *progname, int status) {
fprintf(stderr,
"bwt, a burrows-wheeler transform program, version %s.\n",
divsufsort_version());
fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname);
fprintf(stderr, " -b num set block size to num MiB [1..512] (default: 32)\n\n");
exit(status);
}
int
main(int argc, const char *argv[]) {
FILE *fp, *ofp;
const char *fname, *ofname;
sauchar_t *T;
saidx_t *SA;
LFS_OFF_T n;
size_t m;
saidx_t pidx;
clock_t start,finish;
saint_t i, blocksize = 32, needclose = 3;
/* Check arguments. */
if((argc == 1) ||
(strcmp(argv[1], "-h") == 0) ||
(strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); }
i = 1;
if(argc == 5) {
if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); }
blocksize = atoi(argv[i + 1]);
if(blocksize < 0) { blocksize = 1; }
else if(512 < blocksize) { blocksize = 512; }
i += 2;
}
blocksize <<= 20;
/* Open a file for reading. */
if(strcmp(argv[i], "-") != 0) {
#if HAVE_FOPEN_S
if(fopen_s(&fp, fname = argv[i], "rb") != 0) {
#else
if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) {
#endif
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
} else {
#if HAVE__SETMODE && HAVE__FILENO
if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
perror(NULL);
exit(EXIT_FAILURE);
}
#endif
fp = stdin;
fname = "stdin";
needclose ^= 1;
}
i += 1;
/* Open a file for writing. */
if(strcmp(argv[i], "-") != 0) {
#if HAVE_FOPEN_S
if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) {
#else
if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) {
#endif
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
perror(NULL);
exit(EXIT_FAILURE);
}
} else {
#if HAVE__SETMODE && HAVE__FILENO
if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
perror(NULL);
exit(EXIT_FAILURE);
}
#endif
ofp = stdout;
ofname = "stdout";
needclose ^= 2;
}
/* Get the file size. */
if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
n = LFS_FTELL(fp);
rewind(fp);
if(n < 0) {
fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
if(0x20000000L < n) { n = 0x20000000L; }
if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; }
} else if(blocksize == 0) { blocksize = 32 << 20; }
/* Allocate 5blocksize bytes of memory. */
T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
if((T == NULL) || (SA == NULL)) {
fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
exit(EXIT_FAILURE);
}
/* Write the blocksize. */
if(write_int(ofp, blocksize) != 4) {
fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
perror(NULL);
exit(EXIT_FAILURE);
}
fprintf(stderr, " BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
start = clock();
for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) {
/* Burrows-Wheeler Transform. */
pidx = divbwt(T, T, SA, m);
if(pidx < 0) {
fprintf(stderr, "%s (bw_transform): %s.\n",
argv[0],
(pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
exit(EXIT_FAILURE);
}
/* Write the bwted data. */
if((write_int(ofp, pidx) != 4) ||
(fwrite(T, sizeof(sauchar_t), m, ofp) != m)) {
fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
perror(NULL);
exit(EXIT_FAILURE);
}
}
if(ferror(fp)) {
fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
finish = clock();
fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
/* Close files */
if(needclose & 1) { fclose(fp); }
if(needclose & 2) { fclose(ofp); }
/* Deallocate memory. */
free(SA);
free(T);
return 0;
}

View file

@ -0,0 +1,193 @@
/*
* mksary.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#if HAVE_STRING_H
# include <string.h>
#endif
#if HAVE_STDLIB_H
# include <stdlib.h>
#endif
#if HAVE_MEMORY_H
# include <memory.h>
#endif
#if HAVE_STDDEF_H
# include <stddef.h>
#endif
#if HAVE_STRINGS_H
# include <strings.h>
#endif
#if HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#if HAVE_IO_H && HAVE_FCNTL_H
# include <io.h>
# include <fcntl.h>
#endif
#include <time.h>
#include <divsufsort.h>
#include "lfs.h"
static
void
print_help(const char *progname, int status) {
fprintf(stderr,
"mksary, a simple suffix array builder, version %s.\n",
divsufsort_version());
fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
exit(status);
}
int
main(int argc, const char *argv[]) {
FILE *fp, *ofp;
const char *fname, *ofname;
sauchar_t *T;
saidx_t *SA;
LFS_OFF_T n;
clock_t start, finish;
saint_t needclose = 3;
/* Check arguments. */
if((argc == 1) ||
(strcmp(argv[1], "-h") == 0) ||
(strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
/* Open a file for reading. */
if(strcmp(argv[1], "-") != 0) {
#if HAVE_FOPEN_S
if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
#else
if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
#endif
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
} else {
#if HAVE__SETMODE && HAVE__FILENO
if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
perror(NULL);
exit(EXIT_FAILURE);
}
#endif
fp = stdin;
fname = "stdin";
needclose ^= 1;
}
/* Open a file for writing. */
if(strcmp(argv[2], "-") != 0) {
#if HAVE_FOPEN_S
if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
#else
if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
#endif
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
perror(NULL);
exit(EXIT_FAILURE);
}
} else {
#if HAVE__SETMODE && HAVE__FILENO
if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
perror(NULL);
exit(EXIT_FAILURE);
}
#endif
ofp = stdout;
ofname = "stdout";
needclose ^= 2;
}
/* Get the file size. */
if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
n = LFS_FTELL(fp);
rewind(fp);
if(n < 0) {
fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
if(0x7fffffff <= n) {
fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
exit(EXIT_FAILURE);
}
} else {
fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
/* Allocate 5blocksize bytes of memory. */
T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
if((T == NULL) || (SA == NULL)) {
fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
exit(EXIT_FAILURE);
}
/* Read n bytes of data. */
if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
fprintf(stderr, "%s: %s `%s': ",
argv[0],
(ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
fname);
perror(NULL);
exit(EXIT_FAILURE);
}
if(needclose & 1) { fclose(fp); }
/* Construct the suffix array. */
fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
start = clock();
if(divsufsort(T, SA, (saidx_t)n) != 0) {
fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
exit(EXIT_FAILURE);
}
finish = clock();
fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
/* Write the suffix array. */
if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) {
fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
perror(NULL);
exit(EXIT_FAILURE);
}
if(needclose & 2) { fclose(ofp); }
/* Deallocate memory. */
free(SA);
free(T);
return 0;
}

View file

@ -0,0 +1,165 @@
/*
* sasearch.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#if HAVE_STRING_H
# include <string.h>
#endif
#if HAVE_STDLIB_H
# include <stdlib.h>
#endif
#if HAVE_MEMORY_H
# include <memory.h>
#endif
#if HAVE_STDDEF_H
# include <stddef.h>
#endif
#if HAVE_STRINGS_H
# include <strings.h>
#endif
#if HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#if HAVE_IO_H && HAVE_FCNTL_H
# include <io.h>
# include <fcntl.h>
#endif
#include <divsufsort.h>
#include "lfs.h"
static
void
print_help(const char *progname, int status) {
fprintf(stderr,
"sasearch, a simple SA-based full-text search tool, version %s\n",
divsufsort_version());
fprintf(stderr, "usage: %s PATTERN FILE SAFILE\n\n", progname);
exit(status);
}
int
main(int argc, const char *argv[]) {
FILE *fp;
const char *P;
sauchar_t *T;
saidx_t *SA;
LFS_OFF_T n;
size_t Psize;
saidx_t i, size, left;
if((argc == 1) ||
(strcmp(argv[1], "-h") == 0) ||
(strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
if(argc != 4) { print_help(argv[0], EXIT_FAILURE); }
P = argv[1];
Psize = strlen(P);
/* Open a file for reading. */
#if HAVE_FOPEN_S
if(fopen_s(&fp, argv[2], "rb") != 0) {
#else
if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) {
#endif
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]);
perror(NULL);
exit(EXIT_FAILURE);
}
/* Get the file size. */
if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
n = LFS_FTELL(fp);
rewind(fp);
if(n < 0) {
fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]);
perror(NULL);
exit(EXIT_FAILURE);
}
} else {
fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]);
perror(NULL);
exit(EXIT_FAILURE);
}
/* Allocate 5n bytes of memory. */
T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
if((T == NULL) || (SA == NULL)) {
fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
exit(EXIT_FAILURE);
}
/* Read n bytes of data. */
if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
fprintf(stderr, "%s: %s `%s': ",
argv[0],
(ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
argv[2]);
perror(NULL);
exit(EXIT_FAILURE);
}
fclose(fp);
/* Open the SA file for reading. */
#if HAVE_FOPEN_S
if(fopen_s(&fp, argv[3], "rb") != 0) {
#else
if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) {
#endif
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]);
perror(NULL);
exit(EXIT_FAILURE);
}
/* Read n * sizeof(saidx_t) bytes of data. */
if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) {
fprintf(stderr, "%s: %s `%s': ",
argv[0],
(ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
argv[3]);
perror(NULL);
exit(EXIT_FAILURE);
}
fclose(fp);
/* Search and print */
size = sa_search(T, (saidx_t)n,
(const sauchar_t *)P, (saidx_t)Psize,
SA, (saidx_t)n, &left);
for(i = 0; i < size; ++i) {
fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]);
}
/* Deallocate memory. */
free(SA);
free(T);
return 0;
}

View file

@ -0,0 +1,164 @@
/*
* suftest.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#if HAVE_STRING_H
# include <string.h>
#endif
#if HAVE_STDLIB_H
# include <stdlib.h>
#endif
#if HAVE_MEMORY_H
# include <memory.h>
#endif
#if HAVE_STDDEF_H
# include <stddef.h>
#endif
#if HAVE_STRINGS_H
# include <strings.h>
#endif
#if HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#if HAVE_IO_H && HAVE_FCNTL_H
# include <io.h>
# include <fcntl.h>
#endif
#include <time.h>
#include <divsufsort.h>
#include "lfs.h"
static
void
print_help(const char *progname, int status) {
fprintf(stderr,
"suftest, a suffixsort tester, version %s.\n",
divsufsort_version());
fprintf(stderr, "usage: %s FILE\n\n", progname);
exit(status);
}
int
main(int argc, const char *argv[]) {
FILE *fp;
const char *fname;
sauchar_t *T;
saidx_t *SA;
LFS_OFF_T n;
clock_t start, finish;
saint_t needclose = 1;
/* Check arguments. */
if((argc == 1) ||
(strcmp(argv[1], "-h") == 0) ||
(strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
if(argc != 2) { print_help(argv[0], EXIT_FAILURE); }
/* Open a file for reading. */
if(strcmp(argv[1], "-") != 0) {
#if HAVE_FOPEN_S
if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
#else
if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
#endif
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
} else {
#if HAVE__SETMODE && HAVE__FILENO
if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
perror(NULL);
exit(EXIT_FAILURE);
}
#endif
fp = stdin;
fname = "stdin";
needclose = 0;
}
/* Get the file size. */
if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
n = LFS_FTELL(fp);
rewind(fp);
if(n < 0) {
fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
if(0x7fffffff <= n) {
fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
exit(EXIT_FAILURE);
}
} else {
fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
/* Allocate 5n bytes of memory. */
T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
if((T == NULL) || (SA == NULL)) {
fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
exit(EXIT_FAILURE);
}
/* Read n bytes of data. */
if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
fprintf(stderr, "%s: %s `%s': ",
argv[0],
(ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
argv[1]);
perror(NULL);
exit(EXIT_FAILURE);
}
if(needclose & 1) { fclose(fp); }
/* Construct the suffix array. */
fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
start = clock();
if(divsufsort(T, SA, (saidx_t)n) != 0) {
fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
exit(EXIT_FAILURE);
}
finish = clock();
fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
/* Check the suffix array. */
if(sufcheck(T, SA, (saidx_t)n, 1) != 0) { exit(EXIT_FAILURE); }
/* Deallocate memory. */
free(SA);
free(T);
return 0;
}

View file

@ -0,0 +1,207 @@
/*
* unbwt.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#if HAVE_STRING_H
# include <string.h>
#endif
#if HAVE_STDLIB_H
# include <stdlib.h>
#endif
#if HAVE_MEMORY_H
# include <memory.h>
#endif
#if HAVE_STDDEF_H
# include <stddef.h>
#endif
#if HAVE_STRINGS_H
# include <strings.h>
#endif
#if HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#if HAVE_IO_H && HAVE_FCNTL_H
# include <io.h>
# include <fcntl.h>
#endif
#include <time.h>
#include <divsufsort.h>
#include "lfs.h"
static
size_t
read_int(FILE *fp, saidx_t *n) {
unsigned char c[4];
size_t m = fread(c, sizeof(unsigned char), 4, fp);
if(m == 4) {
*n = (c[0] << 0) | (c[1] << 8) |
(c[2] << 16) | (c[3] << 24);
}
return m;
}
static
void
print_help(const char *progname, int status) {
fprintf(stderr,
"unbwt, an inverse burrows-wheeler transform program, version %s.\n",
divsufsort_version());
fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
exit(status);
}
int
main(int argc, const char *argv[]) {
FILE *fp, *ofp;
const char *fname, *ofname;
sauchar_t *T;
saidx_t *A;
LFS_OFF_T n;
size_t m;
saidx_t pidx;
clock_t start, finish;
saint_t err, blocksize, needclose = 3;
/* Check arguments. */
if((argc == 1) ||
(strcmp(argv[1], "-h") == 0) ||
(strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
/* Open a file for reading. */
if(strcmp(argv[1], "-") != 0) {
#if HAVE_FOPEN_S
if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
#else
if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
#endif
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
} else {
#if HAVE__SETMODE && HAVE__FILENO
if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
perror(NULL);
exit(EXIT_FAILURE);
}
#endif
fp = stdin;
fname = "stdin";
needclose ^= 1;
}
/* Open a file for writing. */
if(strcmp(argv[2], "-") != 0) {
#if HAVE_FOPEN_S
if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
#else
if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
#endif
fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
perror(NULL);
exit(EXIT_FAILURE);
}
} else {
#if HAVE__SETMODE && HAVE__FILENO
if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
perror(NULL);
exit(EXIT_FAILURE);
}
#endif
ofp = stdout;
ofname = "stdout";
needclose ^= 2;
}
/* Read the blocksize. */
if(read_int(fp, &blocksize) != 4) {
fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
/* Allocate 5blocksize bytes of memory. */
T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
A = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
if((T == NULL) || (A == NULL)) {
fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
exit(EXIT_FAILURE);
}
fprintf(stderr, "UnBWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
start = clock();
for(n = 0; (m = read_int(fp, &pidx)) != 0; n += m) {
/* Read blocksize bytes of data. */
if((m != 4) || ((m = fread(T, sizeof(sauchar_t), blocksize, fp)) == 0)) {
fprintf(stderr, "%s: %s `%s': ",
argv[0],
(ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
fname);
perror(NULL);
exit(EXIT_FAILURE);
}
/* Inverse Burrows-Wheeler Transform. */
if((err = inverse_bw_transform(T, T, A, m, pidx)) != 0) {
fprintf(stderr, "%s (reverseBWT): %s.\n",
argv[0],
(err == -1) ? "Invalid data" : "Cannot allocate memory");
exit(EXIT_FAILURE);
}
/* Write m bytes of data. */
if(fwrite(T, sizeof(sauchar_t), m, ofp) != m) {
fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
perror(NULL);
exit(EXIT_FAILURE);
}
}
if(ferror(fp)) {
fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
perror(NULL);
exit(EXIT_FAILURE);
}
finish = clock();
fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
/* Close files */
if(needclose & 1) { fclose(fp); }
if(needclose & 2) { fclose(ofp); }
/* Deallocate memory. */
free(A);
free(T);
return 0;
}

View file

@ -0,0 +1,162 @@
include(CheckIncludeFiles)
include(CheckIncludeFile)
include(CheckSymbolExists)
include(CheckTypeSize)
include(CheckFunctionKeywords)
include(CheckLFS)
## Checks for header files ##
check_include_file("inttypes.h" HAVE_INTTYPES_H)
check_include_file("memory.h" HAVE_MEMORY_H)
check_include_file("stddef.h" HAVE_STDDEF_H)
check_include_file("stdint.h" HAVE_STDINT_H)
check_include_file("stdlib.h" HAVE_STDLIB_H)
check_include_file("string.h" HAVE_STRING_H)
check_include_file("strings.h" HAVE_STRINGS_H)
check_include_file("sys/types.h" HAVE_SYS_TYPES_H)
if(HAVE_INTTYPES_H)
set(INCFILE "#include <inttypes.h>")
elseif(HAVE_STDINT_H)
set(INCFILE "#include <stdint.h>")
else(HAVE_INTTYPES_H)
set(INCFILE "")
endif(HAVE_INTTYPES_H)
## create configuration files from .cmake file ##
if(BUILD_EXAMPLES)
## Checks for WinIO ##
if(WIN32)
check_include_file("io.h" HAVE_IO_H)
check_include_file("fcntl.h" HAVE_FCNTL_H)
check_symbol_exists("_setmode" "io.h;fcntl.h" HAVE__SETMODE)
if(NOT HAVE__SETMODE)
check_symbol_exists("setmode" "io.h;fcntl.h" HAVE_SETMODE)
endif(NOT HAVE__SETMODE)
check_symbol_exists("_fileno" "stdio.h" HAVE__FILENO)
check_symbol_exists("fopen_s" "stdio.h" HAVE_FOPEN_S)
check_symbol_exists("_O_BINARY" "fcntl.h" HAVE__O_BINARY)
endif(WIN32)
## Checks for large file support ##
check_lfs(WITH_LFS)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lfs.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/lfs.h" @ONLY)
endif(BUILD_EXAMPLES)
## generate config.h ##
check_function_keywords("inline;__inline;__inline__;__declspec(dllexport);__declspec(dllimport)")
if(HAVE_INLINE)
set(INLINE "inline")
elseif(HAVE___INLINE)
set(INLINE "__inline")
elseif(HAVE___INLINE__)
set(INLINE "__inline__")
else(HAVE_INLINE)
set(INLINE "")
endif(HAVE_INLINE)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/config.h")
## Checks for types ##
# sauchar_t (8bit)
check_type_size("uint8_t" UINT8_T)
if(HAVE_UINT8_T)
set(SAUCHAR_TYPE "uint8_t")
else(HAVE_UINT8_T)
check_type_size("unsigned char" SIZEOF_UNSIGNED_CHAR)
if("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
set(SAUCHAR_TYPE "unsigned char")
else("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
message(FATAL_ERROR "Cannot find unsigned 8-bit integer type")
endif("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
endif(HAVE_UINT8_T)
# saint_t (32bit)
check_type_size("int32_t" INT32_T)
if(HAVE_INT32_T)
set(SAINT32_TYPE "int32_t")
check_symbol_exists("PRId32" "inttypes.h" HAVE_PRID32)
if(HAVE_PRID32)
set(SAINT32_PRId "PRId32")
else(HAVE_PRID32)
set(SAINT32_PRId "\"d\"")
endif(HAVE_PRID32)
else(HAVE_INT32_T)
check_type_size("int" SIZEOF_INT)
check_type_size("long" SIZEOF_LONG)
check_type_size("short" SIZEOF_SHORT)
check_type_size("__int32" SIZEOF___INT32)
if("${SIZEOF_INT}" STREQUAL "4")
set(SAINT32_TYPE "int")
set(SAINT32_PRId "\"d\"")
elseif("${SIZEOF_LONG}" STREQUAL "4")
set(SAINT32_TYPE "long")
set(SAINT32_PRId "\"ld\"")
elseif("${SIZEOF_SHORT}" STREQUAL "4")
set(SAINT32_TYPE "short")
set(SAINT32_PRId "\"d\"")
elseif("${SIZEOF___INT32}" STREQUAL "4")
set(SAINT32_TYPE "__int32")
set(SAINT32_PRId "\"d\"")
else("${SIZEOF_INT}" STREQUAL "4")
message(FATAL_ERROR "Cannot find 32-bit integer type")
endif("${SIZEOF_INT}" STREQUAL "4")
endif(HAVE_INT32_T)
# saint64_t (64bit)
if(BUILD_DIVSUFSORT64)
check_type_size("int64_t" INT64_T)
if(HAVE_INT64_T)
set(SAINT64_TYPE "int64_t")
check_symbol_exists("PRId64" "inttypes.h" HAVE_PRID64)
if(HAVE_PRID64)
set(SAINT64_PRId "PRId64")
else(HAVE_PRID64)
set(SAINT64_PRId "\"lld\"")
endif(HAVE_PRID64)
else(HAVE_INT64_T)
check_type_size("int" SIZEOF_INT)
check_type_size("long" SIZEOF_LONG)
check_type_size("long long" SIZEOF_LONG_LONG)
check_type_size("__int64" SIZEOF___INT64)
if("${SIZEOF_INT}" STREQUAL "8")
set(SAINT64_TYPE "int")
set(SAINT64_PRId "\"d\"")
elseif("${SIZEOF_LONG}" STREQUAL "8")
set(SAINT64_TYPE "long")
set(SAINT64_PRId "\"ld\"")
elseif("${SIZEOF_LONG_LONG}" STREQUAL "8")
set(SAINT64_TYPE "long long")
set(SAINT64_PRId "\"lld\"")
elseif("${SIZEOF___INT64}" STREQUAL "8")
set(SAINT64_TYPE "__int64")
set(SAINT64_PRId "\"I64d\"")
else("${SIZEOF_INT}" STREQUAL "8")
message(SEND_ERROR "Cannot find 64-bit integer type")
set(BUILD_DIVSUFSORT64 OFF)
endif("${SIZEOF_INT}" STREQUAL "8")
endif(HAVE_INT64_T)
endif(BUILD_DIVSUFSORT64)
## generate divsufsort.h ##
set(DIVSUFSORT_IMPORT "")
set(DIVSUFSORT_EXPORT "")
if(BUILD_SHARED_LIBS)
if(HAVE___DECLSPEC_DLLIMPORT_)
set(DIVSUFSORT_IMPORT "__declspec(dllimport)")
endif(HAVE___DECLSPEC_DLLIMPORT_)
if(HAVE___DECLSPEC_DLLEXPORT_)
set(DIVSUFSORT_EXPORT "__declspec(dllexport)")
endif(HAVE___DECLSPEC_DLLEXPORT_)
endif(BUILD_SHARED_LIBS)
set(W64BIT "")
set(SAINDEX_TYPE "${SAINT32_TYPE}")
set(SAINDEX_PRId "${SAINT32_PRId}")
set(SAINT_PRId "${SAINT32_PRId}")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" @ONLY)
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(BUILD_DIVSUFSORT64)
set(W64BIT "64")
set(SAINDEX_TYPE "${SAINT64_TYPE}")
set(SAINDEX_PRId "${SAINT64_PRId}")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" @ONLY)
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif(BUILD_DIVSUFSORT64)

View file

@ -0,0 +1,81 @@
/*
* config.h for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _CONFIG_H
#define _CONFIG_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/** Define to the version of this package. **/
#cmakedefine PROJECT_VERSION_FULL "${PROJECT_VERSION_FULL}"
/** Define to 1 if you have the header files. **/
#cmakedefine HAVE_INTTYPES_H 1
#cmakedefine HAVE_STDDEF_H 1
#cmakedefine HAVE_STDINT_H 1
#cmakedefine HAVE_STDLIB_H 1
#cmakedefine HAVE_STRING_H 1
#cmakedefine HAVE_STRINGS_H 1
#cmakedefine HAVE_MEMORY_H 1
#cmakedefine HAVE_SYS_TYPES_H 1
/** for WinIO **/
#cmakedefine HAVE_IO_H 1
#cmakedefine HAVE_FCNTL_H 1
#cmakedefine HAVE__SETMODE 1
#cmakedefine HAVE_SETMODE 1
#cmakedefine HAVE__FILENO 1
#cmakedefine HAVE_FOPEN_S 1
#cmakedefine HAVE__O_BINARY 1
#ifndef HAVE__SETMODE
# if HAVE_SETMODE
# define _setmode setmode
# define HAVE__SETMODE 1
# endif
# if HAVE__SETMODE && !HAVE__O_BINARY
# define _O_BINARY 0
# define HAVE__O_BINARY 1
# endif
#endif
/** for inline **/
#ifndef INLINE
# define INLINE @INLINE@
#endif
/** for VC++ warning **/
#ifdef _MSC_VER
#pragma warning(disable: 4127)
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _CONFIG_H */

View file

@ -0,0 +1,189 @@
/*
* divsufsort.h for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _DIVSUFSORT_H
#define _DIVSUFSORT_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#define DIVSUFSORT_API
/*- Datatypes -*/
#ifndef SAUCHAR_T
#define SAUCHAR_T
typedef unsigned char sauchar_t;
#endif /* SAUCHAR_T */
#ifndef SAINT_T
#define SAINT_T
typedef int saint_t;
#endif /* SAINT_T */
#ifndef SAIDX_T
#define SAIDX_T
typedef int saidx_t;
#endif /* SAIDX_T */
#ifndef PRIdSAIDX_T
#define PRIdSAIDX_T "d"
#endif
/*- divsufsort context */
typedef struct _divsufsort_ctx_t {
saidx_t *bucket_A;
saidx_t *bucket_B;
} divsufsort_ctx_t;
/*- Prototypes -*/
/**
* Initialize suffix array context
*
* @return 0 for success, or non-zero in case of an error
*/
int divsufsort_init(divsufsort_ctx_t *ctx);
/**
* Destroy suffix array context
*
* @param ctx suffix array context to destroy
*/
void divsufsort_destroy(divsufsort_ctx_t *ctx);
/**
* Constructs the suffix array of a given string.
* @param ctx suffix array context
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The output array of suffixes.
* @param n The length of the given string.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n);
#if 0
/**
* Constructs the burrows-wheeler transformed string of a given string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @return The primary index if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saidx_t
divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
/**
* Returns the version of the divsufsort library.
* @return The version number string.
*/
DIVSUFSORT_API
const char *
divsufsort_version(void);
/**
* Constructs the burrows-wheeler transformed string of a given string and suffix array.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param SA[0..n-1] The suffix array. (can be NULL)
* @param n The length of the given string.
* @param idx The output primary index.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t
bw_transform(const sauchar_t *T, sauchar_t *U,
saidx_t *SA /* can NULL */,
saidx_t n, saidx_t *idx);
/**
* Inverse BW-transforms a given BWTed string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @param idx The primary index.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t
inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
saidx_t *A /* can NULL */,
saidx_t n, saidx_t idx);
/**
* Checks the correctness of a given suffix array.
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The input suffix array.
* @param n The length of the given string.
* @param verbose The verbose mode.
* @return 0 if no error occurred.
*/
DIVSUFSORT_API
saint_t
sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
/**
* Search for the pattern P in the string T.
* @param T[0..Tsize-1] The input string.
* @param Tsize The length of the given string.
* @param P[0..Psize-1] The input pattern string.
* @param Psize The length of the given pattern string.
* @param SA[0..SAsize-1] The input suffix array.
* @param SAsize The length of the given suffix array.
* @param idx The output index.
* @return The count of matches if no error occurred, -1 otherwise.
*/
DIVSUFSORT_API
saidx_t
sa_search(const sauchar_t *T, saidx_t Tsize,
const sauchar_t *P, saidx_t Psize,
const saidx_t *SA, saidx_t SAsize,
saidx_t *left);
/**
* Search for the character c in the string T.
* @param T[0..Tsize-1] The input string.
* @param Tsize The length of the given string.
* @param SA[0..SAsize-1] The input suffix array.
* @param SAsize The length of the given suffix array.
* @param c The input character.
* @param idx The output index.
* @return The count of matches if no error occurred, -1 otherwise.
*/
DIVSUFSORT_API
saidx_t
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
const saidx_t *SA, saidx_t SAsize,
saint_t c, saidx_t *left);
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _DIVSUFSORT_H */

View file

@ -0,0 +1,180 @@
/*
* divsufsort@W64BIT@.h for libdivsufsort@W64BIT@
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _DIVSUFSORT@W64BIT@_H
#define _DIVSUFSORT@W64BIT@_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
@INCFILE@
#ifndef DIVSUFSORT_API
# ifdef DIVSUFSORT_BUILD_DLL
# define DIVSUFSORT_API @DIVSUFSORT_EXPORT@
# else
# define DIVSUFSORT_API @DIVSUFSORT_IMPORT@
# endif
#endif
/*- Datatypes -*/
#ifndef SAUCHAR_T
#define SAUCHAR_T
typedef @SAUCHAR_TYPE@ sauchar_t;
#endif /* SAUCHAR_T */
#ifndef SAINT_T
#define SAINT_T
typedef @SAINT32_TYPE@ saint_t;
#endif /* SAINT_T */
#ifndef SAIDX@W64BIT@_T
#define SAIDX@W64BIT@_T
typedef @SAINDEX_TYPE@ saidx@W64BIT@_t;
#endif /* SAIDX@W64BIT@_T */
#ifndef PRIdSAINT_T
#define PRIdSAINT_T @SAINT_PRId@
#endif /* PRIdSAINT_T */
#ifndef PRIdSAIDX@W64BIT@_T
#define PRIdSAIDX@W64BIT@_T @SAINDEX_PRId@
#endif /* PRIdSAIDX@W64BIT@_T */
/*- Prototypes -*/
/**
* Constructs the suffix array of a given string.
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The output array of suffixes.
* @param n The length of the given string.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t
divsufsort@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t *SA, saidx@W64BIT@_t n);
/**
* Constructs the burrows-wheeler transformed string of a given string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @return The primary index if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saidx@W64BIT@_t
divbwt@W64BIT@(const sauchar_t *T, sauchar_t *U, saidx@W64BIT@_t *A, saidx@W64BIT@_t n);
/**
* Returns the version of the divsufsort library.
* @return The version number string.
*/
DIVSUFSORT_API
const char *
divsufsort@W64BIT@_version(void);
/**
* Constructs the burrows-wheeler transformed string of a given string and suffix array.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param SA[0..n-1] The suffix array. (can be NULL)
* @param n The length of the given string.
* @param idx The output primary index.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t
bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
saidx@W64BIT@_t *SA /* can NULL */,
saidx@W64BIT@_t n, saidx@W64BIT@_t *idx);
/**
* Inverse BW-transforms a given BWTed string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
* @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @param idx The primary index.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t
inverse_bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
saidx@W64BIT@_t *A /* can NULL */,
saidx@W64BIT@_t n, saidx@W64BIT@_t idx);
/**
* Checks the correctness of a given suffix array.
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The input suffix array.
* @param n The length of the given string.
* @param verbose The verbose mode.
* @return 0 if no error occurred.
*/
DIVSUFSORT_API
saint_t
sufcheck@W64BIT@(const sauchar_t *T, const saidx@W64BIT@_t *SA, saidx@W64BIT@_t n, saint_t verbose);
/**
* Search for the pattern P in the string T.
* @param T[0..Tsize-1] The input string.
* @param Tsize The length of the given string.
* @param P[0..Psize-1] The input pattern string.
* @param Psize The length of the given pattern string.
* @param SA[0..SAsize-1] The input suffix array.
* @param SAsize The length of the given suffix array.
* @param idx The output index.
* @return The count of matches if no error occurred, -1 otherwise.
*/
DIVSUFSORT_API
saidx@W64BIT@_t
sa_search@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
const sauchar_t *P, saidx@W64BIT@_t Psize,
const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize,
saidx@W64BIT@_t *left);
/**
* Search for the character c in the string T.
* @param T[0..Tsize-1] The input string.
* @param Tsize The length of the given string.
* @param SA[0..SAsize-1] The input suffix array.
* @param SAsize The length of the given suffix array.
* @param c The input character.
* @param idx The output index.
* @return The count of matches if no error occurred, -1 otherwise.
*/
DIVSUFSORT_API
saidx@W64BIT@_t
sa_simplesearch@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize,
saint_t c, saidx@W64BIT@_t *left);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _DIVSUFSORT@W64BIT@_H */

View file

@ -0,0 +1,9 @@
#define HAVE_STRING_H 1
#define HAVE_STDLIB_H 1
#define HAVE_MEMORY_H 1
#define HAVE_STDINT_H 1
#define INLINE inline
#ifdef _MSC_VER
#pragma warning( disable : 4244 )
#endif /* _MSC_VER */

View file

@ -0,0 +1,205 @@
/*
* divsufsort_private.h for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _DIVSUFSORT_PRIVATE_H
#define _DIVSUFSORT_PRIVATE_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#include "divsufsort_config.h"
#include <assert.h>
#include <stdio.h>
#if HAVE_STRING_H
# include <string.h>
#endif
#if HAVE_STDLIB_H
# include <stdlib.h>
#endif
#if HAVE_MEMORY_H
# include <memory.h>
#endif
#if HAVE_STDDEF_H
# include <stddef.h>
#endif
#if HAVE_STRINGS_H
# include <strings.h>
#endif
#if HAVE_INTTYPES_H
# include <inttypes.h>
#else
# if HAVE_STDINT_H
# include <stdint.h>
# endif
#endif
#if defined(BUILD_DIVSUFSORT64)
# include "divsufsort64.h"
# ifndef SAIDX_T
# define SAIDX_T
# define saidx_t saidx64_t
# endif /* SAIDX_T */
# ifndef PRIdSAIDX_T
# define PRIdSAIDX_T PRIdSAIDX64_T
# endif /* PRIdSAIDX_T */
# define divsufsort divsufsort64
# define divbwt divbwt64
# define divsufsort_version divsufsort64_version
# define bw_transform bw_transform64
# define inverse_bw_transform inverse_bw_transform64
# define sufcheck sufcheck64
# define sa_search sa_search64
# define sa_simplesearch sa_simplesearch64
# define sssort sssort64
# define trsort trsort64
#else
# include "divsufsort.h"
#endif
/*- Constants -*/
#if !defined(UINT8_MAX)
# define UINT8_MAX (255)
#endif /* UINT8_MAX */
#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
# undef ALPHABET_SIZE
#endif
#if !defined(ALPHABET_SIZE)
# define ALPHABET_SIZE (UINT8_MAX + 1)
#endif
/* for divsufsort.c */
#define BUCKET_A_SIZE (ALPHABET_SIZE)
#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
/* for sssort.c */
#if defined(SS_INSERTIONSORT_THRESHOLD)
# if SS_INSERTIONSORT_THRESHOLD < 1
# undef SS_INSERTIONSORT_THRESHOLD
# define SS_INSERTIONSORT_THRESHOLD (1)
# endif
#else
# define SS_INSERTIONSORT_THRESHOLD (8)
#endif
#if defined(SS_BLOCKSIZE)
# if SS_BLOCKSIZE < 0
# undef SS_BLOCKSIZE
# define SS_BLOCKSIZE (0)
# elif 32768 <= SS_BLOCKSIZE
# undef SS_BLOCKSIZE
# define SS_BLOCKSIZE (32767)
# endif
#else
# define SS_BLOCKSIZE (1024)
#endif
/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
#if SS_BLOCKSIZE == 0
# if defined(BUILD_DIVSUFSORT64)
# define SS_MISORT_STACKSIZE (96)
# else
# define SS_MISORT_STACKSIZE (64)
# endif
#elif SS_BLOCKSIZE <= 4096
# define SS_MISORT_STACKSIZE (16)
#else
# define SS_MISORT_STACKSIZE (24)
#endif
#if defined(BUILD_DIVSUFSORT64)
# define SS_SMERGE_STACKSIZE (64)
#else
# define SS_SMERGE_STACKSIZE (32)
#endif
/* for trsort.c */
#define TR_INSERTIONSORT_THRESHOLD (8)
#if defined(BUILD_DIVSUFSORT64)
# define TR_STACKSIZE (96)
#else
# define TR_STACKSIZE (64)
#endif
/*- Macros -*/
#ifndef SWAP
# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
#endif /* SWAP */
#ifndef MIN
# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
#endif /* MIN */
#ifndef MAX
# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
#endif /* MAX */
#define STACK_PUSH(_a, _b, _c, _d)\
do {\
assert(ssize < STACK_SIZE);\
stack[ssize].a = (_a), stack[ssize].b = (_b),\
stack[ssize].c = (_c), stack[ssize++].d = (_d);\
} while(0)
#define STACK_PUSH5(_a, _b, _c, _d, _e)\
do {\
assert(ssize < STACK_SIZE);\
stack[ssize].a = (_a), stack[ssize].b = (_b),\
stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
} while(0)
#define STACK_POP(_a, _b, _c, _d)\
do {\
assert(0 <= ssize);\
if(ssize == 0) { return; }\
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
(_c) = stack[ssize].c, (_d) = stack[ssize].d;\
} while(0)
#define STACK_POP5(_a, _b, _c, _d, _e)\
do {\
assert(0 <= ssize);\
if(ssize == 0) { return; }\
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
(_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
} while(0)
/* for divsufsort.c */
#define BUCKET_A(_c0) bucket_A[(_c0)]
#if ALPHABET_SIZE == 256
#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
#else
#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
#endif
/*- Private Prototypes -*/
/* sssort.c */
void
sssort(const sauchar_t *Td, const saidx_t *PA,
saidx_t *first, saidx_t *last,
saidx_t *buf, saidx_t bufsize,
saidx_t depth, saidx_t n, saint_t lastsuffix);
/* trsort.c */
void
trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _DIVSUFSORT_PRIVATE_H */

View file

@ -0,0 +1,56 @@
/*
* lfs.h for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _LFS_H
#define _LFS_H 1
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#ifndef __STRICT_ANSI__
# define LFS_OFF_T @LFS_OFF_T@
# define LFS_FOPEN @LFS_FOPEN@
# define LFS_FTELL @LFS_FTELL@
# define LFS_FSEEK @LFS_FSEEK@
# define LFS_PRId @LFS_PRID@
#else
# define LFS_OFF_T long
# define LFS_FOPEN fopen
# define LFS_FTELL ftell
# define LFS_FSEEK fseek
# define LFS_PRId "ld"
#endif
#ifndef PRIdOFF_T
# define PRIdOFF_T LFS_PRId
#endif
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* _LFS_H */

View file

@ -0,0 +1,31 @@
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include"
"${CMAKE_CURRENT_BINARY_DIR}/../include")
set(divsufsort_SRCS divsufsort.c sssort.c trsort.c utils.c)
## libdivsufsort ##
add_library(divsufsort ${divsufsort_SRCS})
install(TARGETS divsufsort
RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
set_target_properties(divsufsort PROPERTIES
VERSION "${LIBRARY_VERSION}"
SOVERSION "${LIBRARY_SOVERSION}"
DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples")
## libdivsufsort64 ##
if(BUILD_DIVSUFSORT64)
add_library(divsufsort64 ${divsufsort_SRCS})
install(TARGETS divsufsort64
RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
set_target_properties(divsufsort64 PROPERTIES
VERSION "${LIBRARY_VERSION}"
SOVERSION "${LIBRARY_SOVERSION}"
DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL
COMPILE_FLAGS "-DBUILD_DIVSUFSORT64"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples")
endif(BUILD_DIVSUFSORT64)

View file

@ -0,0 +1,431 @@
/*
* divsufsort.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "divsufsort_private.h"
#ifdef _OPENMP
# include <omp.h>
#endif
/*- Private Functions -*/
/* Sorts suffixes of type B*. */
static
saidx_t
sort_typeBstar(const sauchar_t *T, saidx_t *SA,
saidx_t *bucket_A, saidx_t *bucket_B,
saidx_t n) {
saidx_t *PAb, *ISAb, *buf;
#ifdef _OPENMP
saidx_t *curbuf;
saidx_t l;
#endif
saidx_t i, j, k, t, m, bufsize;
saint_t c0, c1;
#ifdef _OPENMP
saint_t d0, d1;
int tmp;
#endif
/* Initialize bucket arrays. */
for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
/* Count the number of occurrences of the first one or two characters of each
type A, B and B* suffix. Moreover, store the beginning position of all
type B* suffixes into the array SA. */
for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
/* type A suffix. */
do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
if(0 <= i) {
/* type B* suffix. */
++BUCKET_BSTAR(c0, c1);
SA[--m] = i;
/* type B suffix. */
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
++BUCKET_B(c0, c1);
}
}
}
m = n - m;
/*
note:
A type B* suffix is lexicographically smaller than a type B suffix that
begins with the same first two characters.
*/
/* Calculate the index of start/end point of each bucket. */
for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
t = i + BUCKET_A(c0);
BUCKET_A(c0) = i + j; /* start point */
i = t + BUCKET_B(c0, c0);
for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
j += BUCKET_BSTAR(c0, c1);
BUCKET_BSTAR(c0, c1) = j; /* end point */
i += BUCKET_B(c0, c1);
}
}
if(0 < m) {
/* Sort the type B* suffixes by their first two characters. */
PAb = SA + n - m; ISAb = SA + m;
for(i = m - 2; 0 <= i; --i) {
t = PAb[i], c0 = T[t], c1 = T[t + 1];
SA[--BUCKET_BSTAR(c0, c1)] = i;
}
t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
/* Sort the type B* substrings using sssort. */
#ifdef _OPENMP
tmp = omp_get_max_threads();
buf = SA + m, bufsize = (n - (2 * m)) / tmp;
c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
{
tmp = omp_get_thread_num();
curbuf = buf + tmp * bufsize;
k = 0;
for(;;) {
#pragma omp critical(sssort_lock)
{
if(0 < (l = j)) {
d0 = c0, d1 = c1;
do {
k = BUCKET_BSTAR(d0, d1);
if(--d1 <= d0) {
d1 = ALPHABET_SIZE - 1;
if(--d0 < 0) { break; }
}
} while(((l - k) <= 1) && (0 < (l = k)));
c0 = d0, c1 = d1, j = k;
}
}
if(l == 0) { break; }
sssort(T, PAb, SA + k, SA + l,
curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
}
}
#else
buf = SA + m, bufsize = n - (2 * m);
for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
i = BUCKET_BSTAR(c0, c1);
if(1 < (j - i)) {
sssort(T, PAb, SA + i, SA + j,
buf, bufsize, 2, n, *(SA + i) == (m - 1));
}
}
}
#endif
/* Compute ranks of type B* substrings. */
for(i = m - 1; 0 <= i; --i) {
if(0 <= SA[i]) {
j = i;
do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
SA[i + 1] = i - j;
if(i <= 0) { break; }
}
j = i;
do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
ISAb[SA[i]] = j;
}
/* Construct the inverse suffix array of type B* suffixes using trsort. */
trsort(ISAb, SA, m, 1);
/* Set the sorted order of tyoe B* suffixes. */
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
if(0 <= i) {
t = i;
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
}
}
/* Calculate the index of start/end point of each bucket. */
BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
i = BUCKET_A(c0 + 1) - 1;
for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
t = i - BUCKET_B(c0, c1);
BUCKET_B(c0, c1) = i; /* end point */
/* Move all type B* suffixes to the correct position. */
for(i = t, j = BUCKET_BSTAR(c0, c1);
j <= k;
--i, --k) { SA[i] = SA[k]; }
}
BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
BUCKET_B(c0, c0) = i; /* end point */
}
}
return m;
}
/* Constructs the suffix array by using the sorted order of type B* suffixes. */
static
void
construct_SA(const sauchar_t *T, saidx_t *SA,
saidx_t *bucket_A, saidx_t *bucket_B,
saidx_t n, saidx_t m) {
saidx_t *i, *j, *k;
saidx_t s;
saint_t c0, c1, c2;
if(0 < m) {
/* Construct the sorted order of type B suffixes by using
the sorted order of type B* suffixes. */
for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
/* Scan the suffix array from right to left. */
for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
i <= j;
--j) {
if(0 < (s = *j)) {
assert(T[s] == c1);
assert(((s + 1) < n) && (T[s] <= T[s + 1]));
assert(T[s - 1] <= T[s]);
*j = ~s;
c0 = T[--s];
if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
if(c0 != c2) {
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
k = SA + BUCKET_B(c2 = c0, c1);
}
assert(k < j);
*k-- = s;
} else {
assert(((s == 0) && (T[s] == c1)) || (s < 0));
*j = ~s;
}
}
}
}
/* Construct the suffix array by using
the sorted order of type B suffixes. */
k = SA + BUCKET_A(c2 = T[n - 1]);
*k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
/* Scan the suffix array from left to right. */
for(i = SA, j = SA + n; i < j; ++i) {
if(0 < (s = *i)) {
assert(T[s - 1] >= T[s]);
c0 = T[--s];
if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
if(c0 != c2) {
BUCKET_A(c2) = k - SA;
k = SA + BUCKET_A(c2 = c0);
}
assert(i < k);
*k++ = s;
} else {
assert(s < 0);
*i = ~s;
}
}
}
#if 0
/* Constructs the burrows-wheeler transformed string directly
by using the sorted order of type B* suffixes. */
static
saidx_t
construct_BWT(const sauchar_t *T, saidx_t *SA,
saidx_t *bucket_A, saidx_t *bucket_B,
saidx_t n, saidx_t m) {
saidx_t *i, *j, *k, *orig;
saidx_t s;
saint_t c0, c1, c2;
if(0 < m) {
/* Construct the sorted order of type B suffixes by using
the sorted order of type B* suffixes. */
for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
/* Scan the suffix array from right to left. */
for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
i <= j;
--j) {
if(0 < (s = *j)) {
assert(T[s] == c1);
assert(((s + 1) < n) && (T[s] <= T[s + 1]));
assert(T[s - 1] <= T[s]);
c0 = T[--s];
*j = ~((saidx_t)c0);
if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
if(c0 != c2) {
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
k = SA + BUCKET_B(c2 = c0, c1);
}
assert(k < j);
*k-- = s;
} else if(s != 0) {
*j = ~s;
#ifndef NDEBUG
} else {
assert(T[s] == c1);
#endif
}
}
}
}
/* Construct the BWTed string by using
the sorted order of type B suffixes. */
k = SA + BUCKET_A(c2 = T[n - 1]);
*k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
/* Scan the suffix array from left to right. */
for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
if(0 < (s = *i)) {
assert(T[s - 1] >= T[s]);
c0 = T[--s];
*i = c0;
if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
if(c0 != c2) {
BUCKET_A(c2) = k - SA;
k = SA + BUCKET_A(c2 = c0);
}
assert(i < k);
*k++ = s;
} else if(s != 0) {
*i = ~s;
} else {
orig = i;
}
}
return orig - SA;
}
#endif
/*---------------------------------------------------------------------------*/
/**
* Initialize suffix array context
*
* @return 0 for success, or non-zero in case of an error
*/
int divsufsort_init(divsufsort_ctx_t *ctx) {
ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
ctx->bucket_B = NULL;
if (ctx->bucket_A) {
ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
if (ctx->bucket_B)
return 0;
}
divsufsort_destroy(ctx);
return -1;
}
/**
* Destroy suffix array context
*
* @param ctx suffix array context to destroy
*/
void divsufsort_destroy(divsufsort_ctx_t *ctx) {
if (ctx->bucket_B) {
free(ctx->bucket_B);
ctx->bucket_B = NULL;
}
if (ctx->bucket_A) {
free(ctx->bucket_A);
ctx->bucket_A = NULL;
}
}
/*- Function -*/
saint_t
divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) {
saidx_t m;
saint_t err = 0;
/* Check arguments. */
if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
else if(n == 0) { return 0; }
else if(n == 1) { SA[0] = 0; return 0; }
else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
/* Suffixsort. */
if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) {
m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n);
construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m);
} else {
err = -2;
}
return err;
}
#if 0
saidx_t
divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
saidx_t *B;
saidx_t *bucket_A, *bucket_B;
saidx_t m, pidx, i;
/* Check arguments. */
if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
/* Burrows-Wheeler Transform. */
if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
/* Copy to output string. */
U[0] = T[n - 1];
for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
pidx += 1;
} else {
pidx = -2;
}
free(bucket_B);
free(bucket_A);
if(A == NULL) { free(B); }
return pidx;
}
const char *
divsufsort_version(void) {
return PROJECT_VERSION_FULL;
}
#endif

View file

@ -0,0 +1,383 @@
/*
* utils.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "divsufsort_private.h"
/*- Private Function -*/
#if 0
/* Binary search for inverse bwt. */
static
saidx_t
binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
saidx_t half, i;
for(i = 0, half = size >> 1;
0 < size;
size = half, half >>= 1) {
if(A[i + half] < value) {
i += half + 1;
half -= (size & 1) ^ 1;
}
}
return i;
}
/*- Functions -*/
/* Burrows-Wheeler transform. */
saint_t
bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
saidx_t n, saidx_t *idx) {
saidx_t *A, i, j, p, t;
saint_t c;
/* Check arguments. */
if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
if(n <= 1) {
if(n == 1) { U[0] = T[0]; }
*idx = n;
return 0;
}
if((A = SA) == NULL) {
i = divbwt(T, U, NULL, n);
if(0 <= i) { *idx = i; i = 0; }
return (saint_t)i;
}
/* BW transform. */
if(T == U) {
t = n;
for(i = 0, j = 0; i < n; ++i) {
p = t - 1;
t = A[i];
if(0 <= p) {
c = T[j];
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
A[j] = c;
j++;
} else {
*idx = i;
}
}
p = t - 1;
if(0 <= p) {
c = T[j];
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
A[j] = c;
} else {
*idx = i;
}
} else {
U[0] = T[n - 1];
for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
*idx = i + 1;
for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
}
if(SA == NULL) {
/* Deallocate memory. */
free(A);
}
return 0;
}
/* Inverse Burrows-Wheeler transform. */
saint_t
inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
saidx_t n, saidx_t idx) {
saidx_t C[ALPHABET_SIZE];
sauchar_t D[ALPHABET_SIZE];
saidx_t *B;
saidx_t i, p;
saint_t c, d;
/* Check arguments. */
if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
(n < idx) || ((0 < n) && (idx == 0))) {
return -1;
}
if(n <= 1) { return 0; }
if((B = A) == NULL) {
/* Allocate n*sizeof(saidx_t) bytes of memory. */
if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
}
/* Inverse BW transform. */
for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
for(i = 0; i < n; ++i) { ++C[T[i]]; }
for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
p = C[c];
if(0 < p) {
C[c] = i;
D[d++] = (sauchar_t)c;
i += p;
}
}
for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; }
for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
for(i = 0, p = idx; i < n; ++i) {
U[i] = D[binarysearch_lower(C, d, p)];
p = B[p - 1];
}
if(A == NULL) {
/* Deallocate memory. */
free(B);
}
return 0;
}
/* Checks the suffix array SA of the string T. */
saint_t
sufcheck(const sauchar_t *T, const saidx_t *SA,
saidx_t n, saint_t verbose) {
saidx_t C[ALPHABET_SIZE];
saidx_t i, p, q, t;
saint_t c;
if(verbose) { fprintf(stderr, "sufcheck: "); }
/* Check arguments. */
if((T == NULL) || (SA == NULL) || (n < 0)) {
if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
return -1;
}
if(n == 0) {
if(verbose) { fprintf(stderr, "Done.\n"); }
return 0;
}
/* check range: [0..n-1] */
for(i = 0; i < n; ++i) {
if((SA[i] < 0) || (n <= SA[i])) {
if(verbose) {
fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
n - 1, i, SA[i]);
}
return -2;
}
}
/* check first characters. */
for(i = 1; i < n; ++i) {
if(T[SA[i - 1]] > T[SA[i]]) {
if(verbose) {
fprintf(stderr, "Suffixes in wrong order.\n"
" T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
" > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
}
return -3;
}
}
/* check suffixes. */
for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
for(i = 0; i < n; ++i) { ++C[T[i]]; }
for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
t = C[i];
C[i] = p;
p += t;
}
q = C[T[n - 1]];
C[T[n - 1]] += 1;
for(i = 0; i < n; ++i) {
p = SA[i];
if(0 < p) {
c = T[--p];
t = C[c];
} else {
c = T[p = n - 1];
t = q;
}
if((t < 0) || (p != SA[t])) {
if(verbose) {
fprintf(stderr, "Suffix in wrong position.\n"
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
t, (0 <= t) ? SA[t] : -1, i, SA[i]);
}
return -4;
}
if(t != q) {
++C[c];
if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
}
}
if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
return 0;
}
static
int
_compare(const sauchar_t *T, saidx_t Tsize,
const sauchar_t *P, saidx_t Psize,
saidx_t suf, saidx_t *match) {
saidx_t i, j;
saint_t r;
for(i = suf + *match, j = *match, r = 0;
(i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
*match = j;
return (r == 0) ? -(j != Psize) : r;
}
/* Search for the pattern P in the string T. */
saidx_t
sa_search(const sauchar_t *T, saidx_t Tsize,
const sauchar_t *P, saidx_t Psize,
const saidx_t *SA, saidx_t SAsize,
saidx_t *idx) {
saidx_t size, lsize, rsize, half;
saidx_t match, lmatch, rmatch;
saidx_t llmatch, lrmatch, rlmatch, rrmatch;
saidx_t i, j, k;
saint_t r;
if(idx != NULL) { *idx = -1; }
if((T == NULL) || (P == NULL) || (SA == NULL) ||
(Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
if((Tsize == 0) || (SAsize == 0)) { return 0; }
if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
0 < size;
size = half, half >>= 1) {
match = MIN(lmatch, rmatch);
r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
if(r < 0) {
i += half + 1;
half -= (size & 1) ^ 1;
lmatch = match;
} else if(r > 0) {
rmatch = match;
} else {
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
/* left part */
for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
0 < lsize;
lsize = half, half >>= 1) {
lmatch = MIN(llmatch, lrmatch);
r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
if(r < 0) {
j += half + 1;
half -= (lsize & 1) ^ 1;
llmatch = lmatch;
} else {
lrmatch = lmatch;
}
}
/* right part */
for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
0 < rsize;
rsize = half, half >>= 1) {
rmatch = MIN(rlmatch, rrmatch);
r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
if(r <= 0) {
k += half + 1;
half -= (rsize & 1) ^ 1;
rlmatch = rmatch;
} else {
rrmatch = rmatch;
}
}
break;
}
}
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
return k - j;
}
/* Search for the character c in the string T. */
saidx_t
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
const saidx_t *SA, saidx_t SAsize,
saint_t c, saidx_t *idx) {
saidx_t size, lsize, rsize, half;
saidx_t i, j, k, p;
saint_t r;
if(idx != NULL) { *idx = -1; }
if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
if((Tsize == 0) || (SAsize == 0)) { return 0; }
for(i = j = k = 0, size = SAsize, half = size >> 1;
0 < size;
size = half, half >>= 1) {
p = SA[i + half];
r = (p < Tsize) ? T[p] - c : -1;
if(r < 0) {
i += half + 1;
half -= (size & 1) ^ 1;
} else if(r == 0) {
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
/* left part */
for(half = lsize >> 1;
0 < lsize;
lsize = half, half >>= 1) {
p = SA[j + half];
r = (p < Tsize) ? T[p] - c : -1;
if(r < 0) {
j += half + 1;
half -= (lsize & 1) ^ 1;
}
}
/* right part */
for(half = rsize >> 1;
0 < rsize;
rsize = half, half >>= 1) {
p = SA[k + half];
r = (p < Tsize) ? T[p] - c : -1;
if(r <= 0) {
k += half + 1;
half -= (rsize & 1) ^ 1;
}
}
break;
}
}
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
return k - j;
}
#endif

View file

@ -0,0 +1,815 @@
/*
* sssort.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "divsufsort_private.h"
/*- Private Functions -*/
static const saint_t lg_table[256]= {
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
static INLINE
saint_t
ss_ilg(saidx_t n) {
#if SS_BLOCKSIZE == 0
# if defined(BUILD_DIVSUFSORT64)
return (n >> 32) ?
((n >> 48) ?
((n >> 56) ?
56 + lg_table[(n >> 56) & 0xff] :
48 + lg_table[(n >> 48) & 0xff]) :
((n >> 40) ?
40 + lg_table[(n >> 40) & 0xff] :
32 + lg_table[(n >> 32) & 0xff])) :
((n & 0xffff0000) ?
((n & 0xff000000) ?
24 + lg_table[(n >> 24) & 0xff] :
16 + lg_table[(n >> 16) & 0xff]) :
((n & 0x0000ff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff]));
# else
return (n & 0xffff0000) ?
((n & 0xff000000) ?
24 + lg_table[(n >> 24) & 0xff] :
16 + lg_table[(n >> 16) & 0xff]) :
((n & 0x0000ff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff]);
# endif
#elif SS_BLOCKSIZE < 256
return lg_table[n];
#else
return (n & 0xff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff];
#endif
}
#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
#if SS_BLOCKSIZE != 0
static const saint_t sqq_table[256] = {
0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
};
static INLINE
saidx_t
ss_isqrt(saidx_t x) {
saidx_t y, e;
if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
e = (x & 0xffff0000) ?
((x & 0xff000000) ?
24 + lg_table[(x >> 24) & 0xff] :
16 + lg_table[(x >> 16) & 0xff]) :
((x & 0x0000ff00) ?
8 + lg_table[(x >> 8) & 0xff] :
0 + lg_table[(x >> 0) & 0xff]);
if(e >= 16) {
y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
if(e >= 24) { y = (y + 1 + x / y) >> 1; }
y = (y + 1 + x / y) >> 1;
} else if(e >= 8) {
y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
} else {
return sqq_table[x] >> 4;
}
return (x < (y * y)) ? y - 1 : y;
}
#endif /* SS_BLOCKSIZE != 0 */
/*---------------------------------------------------------------------------*/
/* Compares two suffixes. */
static INLINE
saint_t
ss_compare(const sauchar_t *T,
const saidx_t *p1, const saidx_t *p2,
saidx_t depth) {
const sauchar_t *U1, *U2, *U1n, *U2n;
for(U1 = T + depth + *p1,
U2 = T + depth + *p2,
U1n = T + *(p1 + 1) + 2,
U2n = T + *(p2 + 1) + 2;
(U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
++U1, ++U2) {
}
return U1 < U1n ?
(U2 < U2n ? *U1 - *U2 : 1) :
(U2 < U2n ? -1 : 0);
}
/*---------------------------------------------------------------------------*/
#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
/* Insertionsort for small size groups */
static
void
ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *last, saidx_t depth) {
saidx_t *i, *j;
saidx_t t;
saint_t r;
for(i = last - 2; first <= i; --i) {
for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
if(last <= j) { break; }
}
if(r == 0) { *j = ~*j; }
*(j - 1) = t;
}
}
#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
/*---------------------------------------------------------------------------*/
#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
static INLINE
void
ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
saidx_t *SA, saidx_t i, saidx_t size) {
saidx_t j, k;
saidx_t v;
saint_t c, d, e;
for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
d = Td[PA[SA[k = j++]]];
if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
if(d <= c) { break; }
}
SA[i] = v;
}
/* Simple top-down heapsort. */
static
void
ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
saidx_t i, m;
saidx_t t;
m = size;
if((size % 2) == 0) {
m--;
if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
}
for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
for(i = m - 1; 0 < i; --i) {
t = SA[0], SA[0] = SA[i];
ss_fixdown(Td, PA, SA, 0, i);
SA[i] = t;
}
}
/*---------------------------------------------------------------------------*/
/* Returns the median of three elements. */
static INLINE
saidx_t *
ss_median3(const sauchar_t *Td, const saidx_t *PA,
saidx_t *v1, saidx_t *v2, saidx_t *v3) {
saidx_t *t;
if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
if(Td[PA[*v2]] > Td[PA[*v3]]) {
if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
else { return v3; }
}
return v2;
}
/* Returns the median of five elements. */
static INLINE
saidx_t *
ss_median5(const sauchar_t *Td, const saidx_t *PA,
saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
saidx_t *t;
if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
return v3;
}
/* Returns the pivot element. */
static INLINE
saidx_t *
ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
saidx_t *middle;
saidx_t t;
t = last - first;
middle = first + t / 2;
if(t <= 512) {
if(t <= 32) {
return ss_median3(Td, PA, first, middle, last - 1);
} else {
t >>= 2;
return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
}
}
t >>= 3;
first = ss_median3(Td, PA, first, first + t, first + (t << 1));
middle = ss_median3(Td, PA, middle - t, middle, middle + t);
last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
return ss_median3(Td, PA, first, middle, last);
}
/*---------------------------------------------------------------------------*/
/* Binary partition for substrings. */
static INLINE
saidx_t *
ss_partition(const saidx_t *PA,
saidx_t *first, saidx_t *last, saidx_t depth) {
saidx_t *a, *b;
saidx_t t;
for(a = first - 1, b = last;;) {
for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
if(b <= a) { break; }
t = ~*b;
*b = *a;
*a = t;
}
if(first < a) { *first = ~*first; }
return a;
}
/* Multikey introsort for medium size groups. */
static
void
ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *last,
saidx_t depth) {
#define STACK_SIZE SS_MISORT_STACKSIZE
struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
const sauchar_t *Td;
saidx_t *a, *b, *c, *d, *e, *f;
saidx_t s, t;
saint_t ssize;
saint_t limit;
saint_t v, x = 0;
for(ssize = 0, limit = ss_ilg(last - first);;) {
if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
#if 1 < SS_INSERTIONSORT_THRESHOLD
if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
#endif
STACK_POP(first, last, depth, limit);
continue;
}
Td = T + depth;
if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
if(limit < 0) {
for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
if((x = Td[PA[*a]]) != v) {
if(1 < (a - first)) { break; }
v = x;
first = a;
}
}
if(Td[PA[*first] - 1] < v) {
first = ss_partition(PA, first, a, depth);
}
if((a - first) <= (last - a)) {
if(1 < (a - first)) {
STACK_PUSH(a, last, depth, -1);
last = a, depth += 1, limit = ss_ilg(a - first);
} else {
first = a, limit = -1;
}
} else {
if(1 < (last - a)) {
STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
first = a, limit = -1;
} else {
last = a, depth += 1, limit = ss_ilg(a - first);
}
}
continue;
}
/* choose pivot */
a = ss_pivot(Td, PA, first, last);
v = Td[PA[*a]];
SWAP(*first, *a);
/* partition */
for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
if(((a = b) < last) && (x < v)) {
for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
if(x == v) { SWAP(*b, *a); ++a; }
}
}
for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
if((b < (d = c)) && (x > v)) {
for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
if(x == v) { SWAP(*c, *d); --d; }
}
}
for(; b < c;) {
SWAP(*b, *c);
for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
if(x == v) { SWAP(*b, *a); ++a; }
}
for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
if(x == v) { SWAP(*c, *d); --d; }
}
}
if(a <= d) {
c = b - 1;
if((s = a - first) > (t = b - a)) { s = t; }
for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
if((s = d - c) > (t = last - d - 1)) { s = t; }
for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
a = first + (b - a), c = last - (d - c);
b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
if((a - first) <= (last - c)) {
if((last - c) <= (c - b)) {
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
STACK_PUSH(c, last, depth, limit);
last = a;
} else if((a - first) <= (c - b)) {
STACK_PUSH(c, last, depth, limit);
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
last = a;
} else {
STACK_PUSH(c, last, depth, limit);
STACK_PUSH(first, a, depth, limit);
first = b, last = c, depth += 1, limit = ss_ilg(c - b);
}
} else {
if((a - first) <= (c - b)) {
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
STACK_PUSH(first, a, depth, limit);
first = c;
} else if((last - c) <= (c - b)) {
STACK_PUSH(first, a, depth, limit);
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
first = c;
} else {
STACK_PUSH(first, a, depth, limit);
STACK_PUSH(c, last, depth, limit);
first = b, last = c, depth += 1, limit = ss_ilg(c - b);
}
}
} else {
limit += 1;
if(Td[PA[*first] - 1] < v) {
first = ss_partition(PA, first, last, depth);
limit = ss_ilg(last - first);
}
depth += 1;
}
}
#undef STACK_SIZE
}
#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
/*---------------------------------------------------------------------------*/
#if SS_BLOCKSIZE != 0
static INLINE
void
ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
saidx_t t;
for(; 0 < n; --n, ++a, ++b) {
t = *a, *a = *b, *b = t;
}
}
static INLINE
void
ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
saidx_t *a, *b, t;
saidx_t l, r;
l = middle - first, r = last - middle;
for(; (0 < l) && (0 < r);) {
if(l == r) { ss_blockswap(first, middle, l); break; }
if(l < r) {
a = last - 1, b = middle - 1;
t = *a;
do {
*a-- = *b, *b-- = *a;
if(b < first) {
*a = t;
last = a;
if((r -= l + 1) <= l) { break; }
a -= 1, b = middle - 1;
t = *a;
}
} while(1);
} else {
a = first, b = middle;
t = *a;
do {
*a++ = *b, *b++ = *a;
if(last <= b) {
*a = t;
first = a + 1;
if((l -= r + 1) <= r) { break; }
a += 1, b = middle;
t = *a;
}
} while(1);
}
}
}
/*---------------------------------------------------------------------------*/
static
void
ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t depth) {
const saidx_t *p;
saidx_t *a, *b;
saidx_t len, half;
saint_t q, r;
saint_t x;
for(;;) {
if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
else { x = 0; p = PA + *(last - 1); }
for(a = first, len = middle - first, half = len >> 1, r = -1;
0 < len;
len = half, half >>= 1) {
b = a + half;
q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
if(q < 0) {
a = b + 1;
half -= (len & 1) ^ 1;
} else {
r = q;
}
}
if(a < middle) {
if(r == 0) { *a = ~*a; }
ss_rotate(a, middle, last);
last -= middle - a;
middle = a;
if(first == middle) { break; }
}
--last;
if(x != 0) { while(*--last < 0) { } }
if(middle == last) { break; }
}
}
/*---------------------------------------------------------------------------*/
/* Merge-forward with internal buffer. */
static
void
ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t *buf, saidx_t depth) {
saidx_t *a, *b, *c, *bufend;
saidx_t t;
saint_t r;
bufend = buf + (middle - first) - 1;
ss_blockswap(buf, first, middle - first);
for(t = *(a = first), b = buf, c = middle;;) {
r = ss_compare(T, PA + *b, PA + *c, depth);
if(r < 0) {
do {
*a++ = *b;
if(bufend <= b) { *bufend = t; return; }
*b++ = *a;
} while(*b < 0);
} else if(r > 0) {
do {
*a++ = *c, *c++ = *a;
if(last <= c) {
while(b < bufend) { *a++ = *b, *b++ = *a; }
*a = *b, *b = t;
return;
}
} while(*c < 0);
} else {
*c = ~*c;
do {
*a++ = *b;
if(bufend <= b) { *bufend = t; return; }
*b++ = *a;
} while(*b < 0);
do {
*a++ = *c, *c++ = *a;
if(last <= c) {
while(b < bufend) { *a++ = *b, *b++ = *a; }
*a = *b, *b = t;
return;
}
} while(*c < 0);
}
}
}
/* Merge-backward with internal buffer. */
static
void
ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t *buf, saidx_t depth) {
const saidx_t *p1, *p2;
saidx_t *a, *b, *c, *bufend;
saidx_t t;
saint_t r;
saint_t x;
bufend = buf + (last - middle) - 1;
ss_blockswap(buf, middle, last - middle);
x = 0;
if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
else { p1 = PA + *bufend; }
if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
else { p2 = PA + *(middle - 1); }
for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
r = ss_compare(T, p1, p2, depth);
if(0 < r) {
if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
*a-- = *b;
if(b <= buf) { *buf = t; break; }
*b-- = *a;
if(*b < 0) { p1 = PA + ~*b; x |= 1; }
else { p1 = PA + *b; }
} else if(r < 0) {
if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
*a-- = *c, *c-- = *a;
if(c < first) {
while(buf < b) { *a-- = *b, *b-- = *a; }
*a = *b, *b = t;
break;
}
if(*c < 0) { p2 = PA + ~*c; x |= 2; }
else { p2 = PA + *c; }
} else {
if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
*a-- = ~*b;
if(b <= buf) { *buf = t; break; }
*b-- = *a;
if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
*a-- = *c, *c-- = *a;
if(c < first) {
while(buf < b) { *a-- = *b, *b-- = *a; }
*a = *b, *b = t;
break;
}
if(*b < 0) { p1 = PA + ~*b; x |= 1; }
else { p1 = PA + *b; }
if(*c < 0) { p2 = PA + ~*c; x |= 2; }
else { p2 = PA + *c; }
}
}
}
/* D&C based merge. */
static
void
ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t *buf, saidx_t bufsize, saidx_t depth) {
#define STACK_SIZE SS_SMERGE_STACKSIZE
#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
#define MERGE_CHECK(a, b, c)\
do {\
if(((c) & 1) ||\
(((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
*(a) = ~*(a);\
}\
if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
*(b) = ~*(b);\
}\
} while(0)
struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
saidx_t *l, *r, *lm, *rm;
saidx_t m, len, half;
saint_t ssize;
saint_t check, next;
for(check = 0, ssize = 0;;) {
if((last - middle) <= bufsize) {
if((first < middle) && (middle < last)) {
ss_mergebackward(T, PA, first, middle, last, buf, depth);
}
MERGE_CHECK(first, last, check);
STACK_POP(first, middle, last, check);
continue;
}
if((middle - first) <= bufsize) {
if(first < middle) {
ss_mergeforward(T, PA, first, middle, last, buf, depth);
}
MERGE_CHECK(first, last, check);
STACK_POP(first, middle, last, check);
continue;
}
for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
0 < len;
len = half, half >>= 1) {
if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
m += half + 1;
half -= (len & 1) ^ 1;
}
}
if(0 < m) {
lm = middle - m, rm = middle + m;
ss_blockswap(lm, middle, m);
l = r = middle, next = 0;
if(rm < last) {
if(*rm < 0) {
*rm = ~*rm;
if(first < lm) { for(; *--l < 0;) { } next |= 4; }
next |= 1;
} else if(first < lm) {
for(; *r < 0; ++r) { }
next |= 2;
}
}
if((l - first) <= (last - r)) {
STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
middle = lm, last = l, check = (check & 3) | (next & 4);
} else {
if((next & 2) && (r == middle)) { next ^= 6; }
STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
first = r, middle = rm, check = (next & 3) | (check & 4);
}
} else {
if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
*middle = ~*middle;
}
MERGE_CHECK(first, last, check);
STACK_POP(first, middle, last, check);
}
}
#undef STACK_SIZE
}
#endif /* SS_BLOCKSIZE != 0 */
/*---------------------------------------------------------------------------*/
/*- Function -*/
/* Substring sort */
void
sssort(const sauchar_t *T, const saidx_t *PA,
saidx_t *first, saidx_t *last,
saidx_t *buf, saidx_t bufsize,
saidx_t depth, saidx_t n, saint_t lastsuffix) {
saidx_t *a;
#if SS_BLOCKSIZE != 0
saidx_t *b, *middle, *curbuf;
saidx_t j, k, curbufsize, limit;
#endif
saidx_t i;
if(lastsuffix != 0) { ++first; }
#if SS_BLOCKSIZE == 0
ss_mintrosort(T, PA, first, last, depth);
#else
if((bufsize < SS_BLOCKSIZE) &&
(bufsize < (last - first)) &&
(bufsize < (limit = ss_isqrt(last - first)))) {
if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
buf = middle = last - limit, bufsize = limit;
} else {
middle = last, limit = 0;
}
for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
#elif 1 < SS_BLOCKSIZE
ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
#endif
curbufsize = last - (a + SS_BLOCKSIZE);
curbuf = a + SS_BLOCKSIZE;
if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
}
}
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
ss_mintrosort(T, PA, a, middle, depth);
#elif 1 < SS_BLOCKSIZE
ss_insertionsort(T, PA, a, middle, depth);
#endif
for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
if(i & 1) {
ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
a -= k;
}
}
if(limit != 0) {
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
ss_mintrosort(T, PA, middle, last, depth);
#elif 1 < SS_BLOCKSIZE
ss_insertionsort(T, PA, middle, last, depth);
#endif
ss_inplacemerge(T, PA, first, middle, last, depth);
}
#endif
if(lastsuffix != 0) {
/* Insert last type B* suffix. */
saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
for(a = first, i = *(first - 1);
(a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
++a) {
*(a - 1) = *a;
}
*(a - 1) = i;
}
}

View file

@ -0,0 +1,586 @@
/*
* trsort.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "divsufsort_private.h"
/*- Private Functions -*/
static const saint_t lg_table[256]= {
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
static INLINE
saint_t
tr_ilg(saidx_t n) {
#if defined(BUILD_DIVSUFSORT64)
return (n >> 32) ?
((n >> 48) ?
((n >> 56) ?
56 + lg_table[(n >> 56) & 0xff] :
48 + lg_table[(n >> 48) & 0xff]) :
((n >> 40) ?
40 + lg_table[(n >> 40) & 0xff] :
32 + lg_table[(n >> 32) & 0xff])) :
((n & 0xffff0000) ?
((n & 0xff000000) ?
24 + lg_table[(n >> 24) & 0xff] :
16 + lg_table[(n >> 16) & 0xff]) :
((n & 0x0000ff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff]));
#else
return (n & 0xffff0000) ?
((n & 0xff000000) ?
24 + lg_table[(n >> 24) & 0xff] :
16 + lg_table[(n >> 16) & 0xff]) :
((n & 0x0000ff00) ?
8 + lg_table[(n >> 8) & 0xff] :
0 + lg_table[(n >> 0) & 0xff]);
#endif
}
/*---------------------------------------------------------------------------*/
/* Simple insertionsort for small size groups. */
static
void
tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
saidx_t *a, *b;
saidx_t t, r;
for(a = first + 1; a < last; ++a) {
for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
if(b < first) { break; }
}
if(r == 0) { *b = ~*b; }
*(b + 1) = t;
}
}
/*---------------------------------------------------------------------------*/
static INLINE
void
tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
saidx_t j, k;
saidx_t v;
saidx_t c, d, e;
for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
d = ISAd[SA[k = j++]];
if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
if(d <= c) { break; }
}
SA[i] = v;
}
/* Simple top-down heapsort. */
static
void
tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
saidx_t i, m;
saidx_t t;
m = size;
if((size % 2) == 0) {
m--;
if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
}
for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
for(i = m - 1; 0 < i; --i) {
t = SA[0], SA[0] = SA[i];
tr_fixdown(ISAd, SA, 0, i);
SA[i] = t;
}
}
/*---------------------------------------------------------------------------*/
/* Returns the median of three elements. */
static INLINE
saidx_t *
tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
saidx_t *t;
if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
if(ISAd[*v2] > ISAd[*v3]) {
if(ISAd[*v1] > ISAd[*v3]) { return v1; }
else { return v3; }
}
return v2;
}
/* Returns the median of five elements. */
static INLINE
saidx_t *
tr_median5(const saidx_t *ISAd,
saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
saidx_t *t;
if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
if(ISAd[*v3] > ISAd[*v4]) { return v4; }
return v3;
}
/* Returns the pivot element. */
static INLINE
saidx_t *
tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
saidx_t *middle;
saidx_t t;
t = last - first;
middle = first + t / 2;
if(t <= 512) {
if(t <= 32) {
return tr_median3(ISAd, first, middle, last - 1);
} else {
t >>= 2;
return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
}
}
t >>= 3;
first = tr_median3(ISAd, first, first + t, first + (t << 1));
middle = tr_median3(ISAd, middle - t, middle, middle + t);
last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
return tr_median3(ISAd, first, middle, last);
}
/*---------------------------------------------------------------------------*/
typedef struct _trbudget_t trbudget_t;
struct _trbudget_t {
saidx_t chance;
saidx_t remain;
saidx_t incval;
saidx_t count;
};
static INLINE
void
trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
budget->chance = chance;
budget->remain = budget->incval = incval;
}
static INLINE
saint_t
trbudget_check(trbudget_t *budget, saidx_t size) {
if(size <= budget->remain) { budget->remain -= size; return 1; }
if(budget->chance == 0) { budget->count += size; return 0; }
budget->remain += budget->incval - size;
budget->chance -= 1;
return 1;
}
/*---------------------------------------------------------------------------*/
static INLINE
void
tr_partition(const saidx_t *ISAd,
saidx_t *first, saidx_t *middle, saidx_t *last,
saidx_t **pa, saidx_t **pb, saidx_t v) {
saidx_t *a, *b, *c, *d, *e, *f;
saidx_t t, s;
saidx_t x = 0;
for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
if(((a = b) < last) && (x < v)) {
for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
if(x == v) { SWAP(*b, *a); ++a; }
}
}
for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
if((b < (d = c)) && (x > v)) {
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
if(x == v) { SWAP(*c, *d); --d; }
}
}
for(; b < c;) {
SWAP(*b, *c);
for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
if(x == v) { SWAP(*b, *a); ++a; }
}
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
if(x == v) { SWAP(*c, *d); --d; }
}
}
if(a <= d) {
c = b - 1;
if((s = a - first) > (t = b - a)) { s = t; }
for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
if((s = d - c) > (t = last - d - 1)) { s = t; }
for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
first += (b - a), last -= (d - c);
}
*pa = first, *pb = last;
}
static
void
tr_copy(saidx_t *ISA, const saidx_t *SA,
saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
saidx_t depth) {
/* sort suffixes of middle partition
by using sorted order of suffixes of left and right partition. */
saidx_t *c, *d, *e;
saidx_t s, v;
v = b - SA - 1;
for(c = first, d = a - 1; c <= d; ++c) {
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
*++d = s;
ISA[s] = d - SA;
}
}
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
*--d = s;
ISA[s] = d - SA;
}
}
}
static
void
tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
saidx_t depth) {
saidx_t *c, *d, *e;
saidx_t s, v;
saidx_t rank, lastrank, newrank = -1;
v = b - SA - 1;
lastrank = -1;
for(c = first, d = a - 1; c <= d; ++c) {
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
*++d = s;
rank = ISA[s + depth];
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
ISA[s] = newrank;
}
}
lastrank = -1;
for(e = d; first <= e; --e) {
rank = ISA[*e];
if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
if(newrank != rank) { ISA[*e] = newrank; }
}
lastrank = -1;
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
*--d = s;
rank = ISA[s + depth];
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
ISA[s] = newrank;
}
}
}
static
void
tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
saidx_t *SA, saidx_t *first, saidx_t *last,
trbudget_t *budget) {
#define STACK_SIZE TR_STACKSIZE
struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
saidx_t *a, *b, *c;
saidx_t t;
saidx_t v, x = 0;
saidx_t incr = ISAd - ISA;
saint_t limit, next;
saint_t ssize, trlink = -1;
for(ssize = 0, limit = tr_ilg(last - first);;) {
if(limit < 0) {
if(limit == -1) {
/* tandem repeat partition */
tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
/* update ranks */
if(a < last) {
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
}
if(b < last) {
for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
}
/* push */
if(1 < (b - a)) {
STACK_PUSH5(NULL, a, b, 0, 0);
STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
trlink = ssize - 2;
}
if((a - first) <= (last - b)) {
if(1 < (a - first)) {
STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
last = a, limit = tr_ilg(a - first);
} else if(1 < (last - b)) {
first = b, limit = tr_ilg(last - b);
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
} else {
if(1 < (last - b)) {
STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
first = b, limit = tr_ilg(last - b);
} else if(1 < (a - first)) {
last = a, limit = tr_ilg(a - first);
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
} else if(limit == -2) {
/* tandem repeat copy */
a = stack[--ssize].b, b = stack[ssize].c;
if(stack[ssize].d == 0) {
tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
} else {
if(0 <= trlink) { stack[trlink].d = -1; }
tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
}
STACK_POP5(ISAd, first, last, limit, trlink);
} else {
/* sorted partition */
if(0 <= *first) {
a = first;
do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
first = a;
}
if(first < last) {
a = first; do { *a = ~*a; } while(*++a < 0);
next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
/* push */
if(trbudget_check(budget, a - first)) {
if((a - first) <= (last - a)) {
STACK_PUSH5(ISAd, a, last, -3, trlink);
ISAd += incr, last = a, limit = next;
} else {
if(1 < (last - a)) {
STACK_PUSH5(ISAd + incr, first, a, next, trlink);
first = a, limit = -3;
} else {
ISAd += incr, last = a, limit = next;
}
}
} else {
if(0 <= trlink) { stack[trlink].d = -1; }
if(1 < (last - a)) {
first = a, limit = -3;
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
continue;
}
if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
tr_insertionsort(ISAd, first, last);
limit = -3;
continue;
}
if(limit-- == 0) {
tr_heapsort(ISAd, first, last - first);
for(a = last - 1; first < a; a = b) {
for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
}
limit = -3;
continue;
}
/* choose pivot */
a = tr_pivot(ISAd, first, last);
SWAP(*first, *a);
v = ISAd[*first];
/* partition */
tr_partition(ISAd, first, first + 1, last, &a, &b, v);
if((last - first) != (b - a)) {
next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
/* update ranks */
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
/* push */
if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
if((a - first) <= (last - b)) {
if((last - b) <= (b - a)) {
if(1 < (a - first)) {
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
STACK_PUSH5(ISAd, b, last, limit, trlink);
last = a;
} else if(1 < (last - b)) {
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
first = b;
} else {
ISAd += incr, first = a, last = b, limit = next;
}
} else if((a - first) <= (b - a)) {
if(1 < (a - first)) {
STACK_PUSH5(ISAd, b, last, limit, trlink);
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
last = a;
} else {
STACK_PUSH5(ISAd, b, last, limit, trlink);
ISAd += incr, first = a, last = b, limit = next;
}
} else {
STACK_PUSH5(ISAd, b, last, limit, trlink);
STACK_PUSH5(ISAd, first, a, limit, trlink);
ISAd += incr, first = a, last = b, limit = next;
}
} else {
if((a - first) <= (b - a)) {
if(1 < (last - b)) {
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
STACK_PUSH5(ISAd, first, a, limit, trlink);
first = b;
} else if(1 < (a - first)) {
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
last = a;
} else {
ISAd += incr, first = a, last = b, limit = next;
}
} else if((last - b) <= (b - a)) {
if(1 < (last - b)) {
STACK_PUSH5(ISAd, first, a, limit, trlink);
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
first = b;
} else {
STACK_PUSH5(ISAd, first, a, limit, trlink);
ISAd += incr, first = a, last = b, limit = next;
}
} else {
STACK_PUSH5(ISAd, first, a, limit, trlink);
STACK_PUSH5(ISAd, b, last, limit, trlink);
ISAd += incr, first = a, last = b, limit = next;
}
}
} else {
if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
if((a - first) <= (last - b)) {
if(1 < (a - first)) {
STACK_PUSH5(ISAd, b, last, limit, trlink);
last = a;
} else if(1 < (last - b)) {
first = b;
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
} else {
if(1 < (last - b)) {
STACK_PUSH5(ISAd, first, a, limit, trlink);
first = b;
} else if(1 < (a - first)) {
last = a;
} else {
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
}
} else {
if(trbudget_check(budget, last - first)) {
limit = tr_ilg(last - first), ISAd += incr;
} else {
if(0 <= trlink) { stack[trlink].d = -1; }
STACK_POP5(ISAd, first, last, limit, trlink);
}
}
}
#undef STACK_SIZE
}
/*---------------------------------------------------------------------------*/
/*- Function -*/
/* Tandem repeat sort */
void
trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
saidx_t *ISAd;
saidx_t *first, *last;
trbudget_t budget;
saidx_t t, skip, unsorted;
trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
first = SA;
skip = 0;
unsorted = 0;
do {
if((t = *first) < 0) { first -= t; skip += t; }
else {
if(skip != 0) { *(first + skip) = skip; skip = 0; }
last = SA + ISA[t] + 1;
if(1 < (last - first)) {
budget.count = 0;
tr_introsort(ISA, ISAd, SA, first, last, &budget);
if(budget.count != 0) { unsorted += budget.count; }
else { skip = first - last; }
} else if((last - first) == 1) {
skip = -1;
}
first = last;
}
} while(first < (SA + n));
if(skip != 0) { *(first + skip) = skip; }
if(unsorted == 0) { break; }
}
}

View file

@ -0,0 +1,9 @@
## generate libdivsufsort.pc ##
set(W64BIT "")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" @ONLY)
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR})
if(BUILD_DIVSUFSORT64)
set(W64BIT "64")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" @ONLY)
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR})
endif(BUILD_DIVSUFSORT64)

View file

@ -0,0 +1,11 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=@CMAKE_INSTALL_LIBDIR@
includedir=@CMAKE_INSTALL_INCLUDEDIR@
Name: @PROJECT_NAME@@W64BIT@
Description: @PROJECT_DESCRIPTION@
Version: @PROJECT_VERSION_FULL@
URL: @PROJECT_URL@
Libs: -L${libdir} -ldivsufsort@W64BIT@
Cflags: -I${includedir}

1109
loader/tools/lzsa/src/lzsa.c Executable file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,361 @@
/*
* matchfinder.c - LZ match finder implementation
*
* The following copying information applies to this specific source code file:
*
* Written in 2019 by Emmanuel Marty <marty.emmanuel@gmail.com>
* Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide via the Creative Commons Zero 1.0 Universal Public Domain
* Dedication (the "CC0").
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the CC0 for more details.
*
* You should have received a copy of the CC0 along with this software; if not
* see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "matchfinder.h"
#include "format.h"
#include "lib.h"
/**
* Hash index into TAG_BITS
*
* @param nIndex index value
*
* @return hash
*/
static inline int lzsa_get_index_tag(unsigned int nIndex) {
return (int)(((unsigned long long)nIndex * 11400714819323198485ULL) >> (64ULL - TAG_BITS));
}
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
/* Build suffix array from input data */
if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
return 100;
}
int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */
int *Phi = PLCP;
int nCurLen = 0;
int i, r;
/* Compute the permuted LCP first (Kärkkäinen method) */
Phi[intervals[0]] = -1;
for (i = 1; i < nInWindowSize; i++)
Phi[intervals[i]] = intervals[i - 1];
for (i = 0; i < nInWindowSize; i++) {
if (Phi[i] == -1) {
PLCP[i] = 0;
continue;
}
int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
PLCP[i] = nCurLen;
if (nCurLen > 0)
nCurLen--;
}
/* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
* saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
* and the interval builder below doesn't need it either. */
intervals[0] &= POS_MASK;
int nMinMatchSize = pCompressor->min_match_size;
if (pCompressor->format_version >= 2) {
for (i = 1; i < nInWindowSize; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_MAX)
nLen = LCP_MAX;
int nTaggedLen = 0;
if (nLen)
nTaggedLen = (nLen << TAG_BITS) | (lzsa_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nTaggedLen) << LCP_SHIFT);
}
}
else {
for (i = 1; i < nInWindowSize; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_AND_TAG_MAX)
nLen = LCP_AND_TAG_MAX;
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
}
}
/**
* Build intervals for finding matches
*
* Methodology and code fragment taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
unsigned int * const SA_and_LCP = intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int next_interval_idx;
unsigned int *top = pCompressor->open_intervals;
unsigned int prev_pos = SA_and_LCP[0] & POS_MASK;
*top = 0;
intervals[0] = 0;
next_interval_idx = 1;
for (r = 1; r < nInWindowSize; r++) {
const unsigned int next_pos = SA_and_LCP[r] & POS_MASK;
const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK;
const unsigned int top_lcp = *top & LCP_MASK;
if (next_lcp == top_lcp) {
/* Continuing the deepest open interval */
pos_data[prev_pos] = *top;
}
else if (next_lcp > top_lcp) {
/* Opening a new interval */
*++top = next_lcp | next_interval_idx++;
pos_data[prev_pos] = *top;
}
else {
/* Closing the deepest open interval */
pos_data[prev_pos] = *top;
for (;;) {
const unsigned int closed_interval_idx = *top-- & POS_MASK;
const unsigned int superinterval_lcp = *top & LCP_MASK;
if (next_lcp == superinterval_lcp) {
/* Continuing the superinterval */
intervals[closed_interval_idx] = *top;
break;
}
else if (next_lcp > superinterval_lcp) {
/* Creating a new interval that is a
* superinterval of the one being
* closed, but still a subinterval of
* its superinterval */
*++top = next_lcp | next_interval_idx++;
intervals[closed_interval_idx] = *top;
break;
}
else {
/* Also closing the superinterval */
intervals[closed_interval_idx] = *top;
}
}
}
prev_pos = next_pos;
}
/* Close any still-open intervals. */
pos_data[prev_pos] = *top;
for (; top > pCompressor->open_intervals; top--)
intervals[*top & POS_MASK] = *(top - 1);
/* Success */
return 0;
}
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return number of matches
*/
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int ref;
unsigned int super_ref;
unsigned int match_pos;
lzsa_match *matchptr;
int nPrevOffset = 0;
/**
* Find matches using intervals
*
* Taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
/* Get the deepest lcp-interval containing the current suffix. */
ref = pos_data[nOffset];
pos_data[nOffset] = 0;
/* Ascend until we reach a visited interval, the root, or a child of the
* root. Link unvisited intervals to the current suffix as we go. */
while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
ref = super_ref;
}
if (super_ref == 0) {
/* In this case, the current interval may be any of:
* (1) the root;
* (2) an unvisited child of the root */
if (ref != 0) /* Not the root? */
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
return 0;
}
/* Ascend indirectly via pos_data[] links. */
match_pos = super_ref & EXCL_VISITED_MASK;
matchptr = pMatches;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
for (;;) {
if ((super_ref = pos_data[match_pos]) > ref) {
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
}
while ((super_ref = pos_data[match_pos]) > ref)
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
pos_data[match_pos] = ref;
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
if (pCompressor->format_version >= 2) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
}
else {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
}
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
}
}
if (super_ref == 0)
break;
ref = super_ref;
match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (unsigned short)nMatchOffset;
if ((matchptr->length & 0x7fff) > 2) {
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
}
}
return (int)(matchptr - pMatches);
}
/**
* Skip previously compressed bytes
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match match;
int i;
/* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
* we don't store the matches. */
for (i = nStartOffset; i < nEndOffset; i++) {
lzsa_find_matches_at(pCompressor, i, &match, 0, 0);
}
}
/**
* Find all matches for the data to be compressed
*
* @param pCompressor compression context
* @param nMatchesPerOffset maximum number of matches to store for each offset
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset) {
lzsa_match *pMatch = pCompressor->match;
int i;
for (i = nStartOffset; i < nEndOffset; i++) {
int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, nMatchesPerOffset, nEndOffset - nStartOffset);
while (nMatches < nMatchesPerOffset) {
pMatch[nMatches].length = 0;
pMatch[nMatches].offset = 0;
nMatches++;
}
pMatch += nMatchesPerOffset;
}
}

View file

@ -0,0 +1,91 @@
/*
* matchfinder.h - LZ match finder definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _MATCHFINDER_H
#define _MATCHFINDER_H
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return number of matches
*/
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize);
/**
* Skip previously compressed bytes
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
/**
* Find all matches for the data to be compressed
*
* @param pCompressor compression context
* @param nMatchesPerOffset maximum number of matches to store for each offset
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset);
#ifdef __cplusplus
}
#endif
#endif /* _MATCHFINDER_H */

View file

@ -0,0 +1,710 @@
/*
* shrink_block_v1.c - LZSA1 block compressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "lib.h"
#include "shrink_block_v1.h"
#include "format.h"
/**
* Get the number of extra bits required to represent a literals length
*
* @param nLength literals length
*
* @return number of extra bits required
*/
static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
if (nLength < LITERALS_RUN_LEN_V1) {
return 0;
}
else {
if (nLength < 256)
return 8;
else {
if (nLength < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= LITERALS_RUN_LEN_V1) {
if (nLength < 256)
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
else {
if (nLength < 512) {
pOutData[nOutOffset++] = 250;
pOutData[nOutOffset++] = nLength - 256;
}
else {
pOutData[nOutOffset++] = 249;
pOutData[nOutOffset++] = nLength & 0xff;
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get the number of extra bits required to represent an encoded match length
*
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*
* @return number of extra bits required
*/
static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
if (nLength < MATCH_RUN_LEN_V1) {
return 0;
}
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
return 8;
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*/
static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= MATCH_RUN_LEN_V1) {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 512) {
pOutData[nOutOffset++] = 239;
pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256;
}
else {
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff;
pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get offset encoding cost in bits
*
* @param nMatchOffset offset to get cost of
*
* @return cost in bits
*/
static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
return (nMatchOffset <= 256) ? 8 : 16;
}
/**
* Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce) {
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
const int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY;
const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
int i, j, n;
if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
memset(arrival + (nStartOffset << ARRIVALS_PER_POSITION_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << ARRIVALS_PER_POSITION_SHIFT));
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1;
for (i = nStartOffset; i != nEndOffset; i++) {
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT];
int m;
for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) {
int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
int nScore = cur_arrival[j].score + 1;
int nNumLiterals = cur_arrival[j].num_literals + 1;
if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
if (nNumLiterals == 1)
nCodingChoiceCost += nModeSwitchPenalty;
lzsa_arrival *pDestSlots = &arrival[(i + 1) << ARRIVALS_PER_POSITION_SHIFT];
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n + 1],
&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = 0;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->score = nScore;
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
break;
}
}
}
const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
int nNumArrivalsForThisPos = j;
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if ((i + nMatchLen) > nEndOffset)
nMatchLen = nEndOffset - i;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT];
for (j = 0; j < nNumArrivalsForThisPos; j++) {
int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int exists = 0;
if (!cur_arrival[j].num_literals)
nCodingChoiceCost += nModeSwitchPenalty;
for (n = 0;
n < NARRIVALS_PER_POSITION_V1 && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
n++) {
if (lzsa_get_offset_cost_v1(pDestSlots[n].rep_offset) == nMatchOffsetCost) {
exists = 1;
break;
}
}
if (!exists) {
int nScore = cur_arrival[j].score + 5;
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
pDestArrival->rep_offset = match[m].offset;
j = NARRIVALS_PER_POSITION_V1;
break;
}
}
}
}
}
}
}
lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0];
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) {
if (end_arrival->from_pos >= nEndOffset) return;
pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
if (end_arrival->match_len)
pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset;
else
pBestMatch[end_arrival->from_pos].offset = 0;
end_arrival = &arrival[(end_arrival->from_pos << ARRIVALS_PER_POSITION_SHIFT) + (end_arrival->from_slot - 1)];
}
}
/**
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
* impacting the compression ratio
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param pBestMatch optimal matches to emit
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length == 0 &&
(i + 1) < nEndOffset &&
pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + 1].length < MAX_VARLEN &&
pBestMatch[i + 1].offset &&
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);
if ((nReducedLenSize - nCurLenSize) <= 8) {
/* Merge */
pBestMatch[i].length = pBestMatch[i + 1].length + 1;
pBestMatch[i].offset = pBestMatch[i + 1].offset;
pBestMatch[i + 1].length = 0;
pBestMatch[i + 1].offset = 0;
nDidReduce = 1;
continue;
}
}
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
if (pMatch->length <= 9 /* Don't waste time considering large matches, they will always win over literals */ &&
(i + pMatch->length) < nEndOffset /* Don't consider the last token in the block, we can only reduce a match inbetween other tokens */) {
int nNextIndex = i + pMatch->length;
int nNextLiterals = 0;
while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length < MIN_MATCH_SIZE_V1) {
nNextLiterals++;
nNextIndex++;
}
/* This command is a match, is followed by 'nNextLiterals' literals and then by another match, or the end of the input. Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
if ((8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((pMatch->offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1) +
8 /* token */ + lzsa_get_literals_varlen_size_v1(nNextLiterals)) >=
(8 /* token */ + (pMatch->length << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + pMatch->length + nNextLiterals))) {
/* Reduce */
int nMatchLen = pMatch->length;
int j;
for (j = 0; j < nMatchLen; j++) {
pBestMatch[i + j].length = 0;
}
nDidReduce = 1;
continue;
}
}
if ((i + pMatch->length) <= nEndOffset && pMatch->offset > 0 && pMatch->length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + pMatch->length].offset > 0 &&
pBestMatch[i + pMatch->length].length >= MIN_MATCH_SIZE_V1 &&
(pMatch->length + pBestMatch[i + pMatch->length].length) >= LEAVE_ALONE_MATCH_SIZE &&
(pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN &&
(i + pMatch->length) > pMatch->offset &&
(i + pMatch->length) > pBestMatch[i + pMatch->length].offset &&
(i + pMatch->length + pBestMatch[i + pMatch->length].length) <= nEndOffset &&
!memcmp(pInWindow + i - pMatch->offset + pMatch->length,
pInWindow + i + pMatch->length - pBestMatch[i + pMatch->length].offset,
pBestMatch[i + pMatch->length].length)) {
int nCurPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1);
nCurPartialSize += 8 /* token */ + lzsa_get_literals_varlen_size_v1(0) + ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
if (nCurPartialSize >= nReducedPartialSize) {
int nMatchLen = pMatch->length;
/* Join */
pMatch->length += pBestMatch[i + nMatchLen].length;
pBestMatch[i + nMatchLen].offset = 0;
pBestMatch[i + nMatchLen].length = -1;
continue;
}
}
i += pMatch->length;
nNumLiterals = 0;
}
else {
nNumLiterals++;
i++;
}
}
return nDidReduce;
}
/**
* Get compressed data block size
*
* @param pCompressor compression context
* @param pBestMatch optimal matches to emit
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return size of compressed data that will be written to output buffer
*/
static int lzsa_get_compressed_size_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nCompressedSize = 0;
for (i = nStartOffset; i < nEndOffset; ) {
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
i += nMatchLen;
}
else {
nNumLiterals++;
i++;
}
}
{
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
nCompressedSize += 8 * 4;
}
return nCompressedSize;
}
/**
* Emit block of compressed data
*
* @param pCompressor compression context
* @param pBestMatch optimal matches to emit
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
int nOutOffset = 0;
for (i = nStartOffset; i < nEndOffset; ) {
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
return -1;
pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
pCompressor->stats.min_literals = nNumLiterals;
if (nNumLiterals > pCompressor->stats.max_literals)
pCompressor->stats.max_literals = nNumLiterals;
pCompressor->stats.total_literals += nNumLiterals;
pCompressor->stats.literals_divisor++;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
if (nTokenLongOffset) {
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
}
nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1)
pCompressor->stats.min_offset = nMatchOffset;
if (nMatchOffset > pCompressor->stats.max_offset)
pCompressor->stats.max_offset = nMatchOffset;
pCompressor->stats.total_offsets += nMatchOffset;
if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1)
pCompressor->stats.min_match_len = nMatchLen;
if (nMatchLen > pCompressor->stats.max_match_len)
pCompressor->stats.max_match_len = nMatchLen;
pCompressor->stats.total_match_lens += nMatchLen;
pCompressor->stats.match_divisor++;
if (nMatchOffset == 1) {
if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1)
pCompressor->stats.min_rle1_len = nMatchLen;
if (nMatchLen > pCompressor->stats.max_rle1_len)
pCompressor->stats.max_rle1_len = nMatchLen;
pCompressor->stats.total_rle1_lens += nMatchLen;
pCompressor->stats.rle1_divisor++;
}
else if (nMatchOffset == 2) {
if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1)
pCompressor->stats.min_rle2_len = nMatchLen;
if (nMatchLen > pCompressor->stats.max_rle2_len)
pCompressor->stats.max_rle2_len = nMatchLen;
pCompressor->stats.total_rle2_lens += nMatchLen;
pCompressor->stats.rle2_divisor++;
}
i += nMatchLen;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
int nCurSafeDist = (i - nStartOffset) - nOutOffset;
if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
pCompressor->safe_dist = nCurSafeDist;
}
pCompressor->num_commands++;
}
else {
if (nNumLiterals == 0)
nInFirstLiteralOffset = i;
nNumLiterals++;
i++;
}
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
pCompressor->stats.min_literals = nNumLiterals;
if (nNumLiterals > pCompressor->stats.max_literals)
pCompressor->stats.max_literals = nNumLiterals;
pCompressor->stats.total_literals += nNumLiterals;
pCompressor->stats.literals_divisor++;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
int nCurSafeDist = (i - nStartOffset) - nOutOffset;
if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
pCompressor->safe_dist = nCurSafeDist;
}
pCompressor->num_commands++;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
/* Emit EOD marker for raw block */
if ((nOutOffset + 4) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 0;
}
return nOutOffset;
}
/**
* Emit raw block of uncompressible data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int nNumLiterals = nEndOffset - nStartOffset;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nOutOffset = 0;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + 4;
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
pCompressor->num_commands = 0;
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nStartOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
/* Emit EOD marker for raw block */
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 0;
return nOutOffset;
}
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int nResult, nBaseCompressedSize;
/* Compress optimally without breaking ties in favor of less tokens */
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */);
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
nBaseCompressedSize = lzsa_get_compressed_size_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
lzsa_match *pBestMatch = pCompressor->best_match - nPreviousBlockSize;
if (nBaseCompressedSize > 0 && nInDataSize < 65536) {
int nReducedCompressedSize;
/* Compress optimally and do break ties in favor of less tokens */
memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */);
nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
nReducedCompressedSize = lzsa_get_compressed_size_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (nReducedCompressedSize > 0 && nReducedCompressedSize <= nBaseCompressedSize) {
/* Pick the parse with the reduced number of tokens as it didn't negatively affect the size */
pBestMatch = pCompressor->improved_match - nPreviousBlockSize;
}
}
nResult = lzsa_write_block_v1(pCompressor, pBestMatch, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
if (nResult < 0 && pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
nResult = lzsa_write_raw_uncompressed_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}
return nResult;
}

View file

@ -0,0 +1,53 @@
/*
* shrink_block_v1.h - LZSA1 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_BLOCK_V1_H
#define _SHRINK_BLOCK_V1_H
/* Forward declarations */
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#endif /* _SHRINK_BLOCK_V1_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,53 @@
/*
* shrink_block_v2.h - LZSA2 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_BLOCK_V2_H
#define _SHRINK_BLOCK_V2_H
/* Forward declarations */
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#endif /* _SHRINK_BLOCK_V2_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,264 @@
/*
* shrink_context.c - compression context implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "shrink_context.h"
#include "shrink_block_v1.h"
#include "shrink_block_v2.h"
#include "format.h"
#include "matchfinder.h"
#include "lib.h"
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
int nResult;
int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
int nMaxMinMatchForFormat = (nFormatVersion == 1) ? 5 : 3;
nResult = divsufsort_init(&pCompressor->divsufsort_context);
pCompressor->intervals = NULL;
pCompressor->pos_data = NULL;
pCompressor->open_intervals = NULL;
pCompressor->match = NULL;
pCompressor->best_match = NULL;
pCompressor->improved_match = NULL;
pCompressor->arrival = NULL;
pCompressor->rep_slot_handled_mask = NULL;
pCompressor->rep_len_handled_mask = NULL;
pCompressor->first_offset_for_byte = NULL;
pCompressor->next_offset_for_pos = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
pCompressor->min_match_size = nMinMatchSizeForFormat;
else if (pCompressor->min_match_size > nMaxMinMatchForFormat)
pCompressor->min_match_size = nMaxMinMatchForFormat;
pCompressor->format_version = nFormatVersion;
pCompressor->flags = nFlags;
pCompressor->safe_dist = 0;
pCompressor->num_commands = 0;
memset(&pCompressor->stats, 0, sizeof(pCompressor->stats));
pCompressor->stats.min_literals = -1;
pCompressor->stats.min_match_len = -1;
pCompressor->stats.min_offset = -1;
pCompressor->stats.min_rle1_len = -1;
pCompressor->stats.min_rle2_len = -1;
pCompressor->end_position = -1;
if (!nResult) {
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->intervals) {
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT) * sizeof(lzsa_arrival));
if (pCompressor->arrival) {
pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
if (pCompressor->best_match) {
pCompressor->improved_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
if (pCompressor->improved_match) {
if (pCompressor->format_version == 2)
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V2 * sizeof(lzsa_match));
else
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V1 * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->rep_slot_handled_mask = (char*)malloc(NARRIVALS_PER_POSITION_V2_BIG * ((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_slot_handled_mask) {
pCompressor->rep_len_handled_mask = (char*)malloc(((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_len_handled_mask) {
pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
if (pCompressor->first_offset_for_byte) {
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
if (pCompressor->next_offset_for_pos) {
return 0;
}
}
}
}
}
else {
return 0;
}
}
}
}
}
}
}
}
}
lzsa_compressor_destroy(pCompressor);
return 100;
}
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->next_offset_for_pos) {
free(pCompressor->next_offset_for_pos);
pCompressor->next_offset_for_pos = NULL;
}
if (pCompressor->first_offset_for_byte) {
free(pCompressor->first_offset_for_byte);
pCompressor->first_offset_for_byte = NULL;
}
if (pCompressor->rep_len_handled_mask) {
free(pCompressor->rep_len_handled_mask);
pCompressor->rep_len_handled_mask = NULL;
}
if (pCompressor->rep_slot_handled_mask) {
free(pCompressor->rep_slot_handled_mask);
pCompressor->rep_slot_handled_mask = NULL;
}
if (pCompressor->match) {
free(pCompressor->match);
pCompressor->match = NULL;
}
if (pCompressor->improved_match) {
free(pCompressor->improved_match);
pCompressor->improved_match = NULL;
}
if (pCompressor->arrival) {
free(pCompressor->arrival);
pCompressor->arrival = NULL;
}
if (pCompressor->best_match) {
free(pCompressor->best_match);
pCompressor->best_match = NULL;
}
if (pCompressor->open_intervals) {
free(pCompressor->open_intervals);
pCompressor->open_intervals = NULL;
}
if (pCompressor->pos_data) {
free(pCompressor->pos_data);
pCompressor->pos_data = NULL;
}
if (pCompressor->intervals) {
free(pCompressor->intervals);
pCompressor->intervals = NULL;
}
}
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int nCompressedSize;
if (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInWindow + nPreviousBlockSize, nInDataSize);
}
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
nCompressedSize = -1;
else {
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}
lzsa_find_all_matches(pCompressor, (pCompressor->format_version == 2) ? NMATCHES_PER_INDEX_V2 : NMATCHES_PER_INDEX_V1, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (pCompressor->format_version == 1) {
nCompressedSize = lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
if (nCompressedSize != -1 && (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD)) {
lzsa_reverse_buffer(pOutData, nCompressedSize);
}
}
else if (pCompressor->format_version == 2) {
nCompressedSize = lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
if (nCompressedSize != -1 && (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD)) {
lzsa_reverse_buffer(pOutData, nCompressedSize);
}
}
else {
nCompressedSize = -1;
}
}
if (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInWindow + nPreviousBlockSize, nInDataSize);
}
return nCompressedSize;
}
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor) {
return pCompressor->num_commands;
}

View file

@ -0,0 +1,184 @@
/*
* shrink_context.h - compression context definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_CONTEXT_H
#define _SHRINK_CONTEXT_H
#include "divsufsort.h"
#ifdef __cplusplus
extern "C" {
#endif
#define LCP_BITS 14
#define TAG_BITS 4
#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
#define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
#define LCP_SHIFT (31-LCP_BITS)
#define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
#define POS_MASK ((1U<<LCP_SHIFT) - 1)
#define VISITED_FLAG 0x80000000
#define EXCL_VISITED_MASK 0x7fffffff
#define NARRIVALS_PER_POSITION_V1 8
#define NARRIVALS_PER_POSITION_V2_SMALL 9
#define NARRIVALS_PER_POSITION_V2_BIG 32
#define ARRIVALS_PER_POSITION_SHIFT 5
#define NMATCHES_PER_INDEX_V1 8
#define MATCHES_PER_INDEX_SHIFT_V1 3
#define NMATCHES_PER_INDEX_V2 64
#define MATCHES_PER_INDEX_SHIFT_V2 6
#define LEAVE_ALONE_MATCH_SIZE 300
#define LEAVE_ALONE_MATCH_SIZE_SMALL 1000
#define MODESWITCH_PENALTY 3
/** One match */
typedef struct _lzsa_match {
unsigned short length;
unsigned short offset;
} lzsa_match;
/** Forward arrival slot */
typedef struct {
int cost;
unsigned short rep_offset;
short from_slot;
int from_pos;
unsigned short rep_len;
unsigned short match_len;
int rep_pos;
int num_literals;
int score;
} lzsa_arrival;
/** Compression statistics */
typedef struct _lzsa_stats {
int min_literals;
int max_literals;
int total_literals;
int min_offset;
int max_offset;
int num_rep_offsets;
int total_offsets;
int min_match_len;
int max_match_len;
int total_match_lens;
int min_rle1_len;
int max_rle1_len;
int total_rle1_lens;
int min_rle2_len;
int max_rle2_len;
int total_rle2_lens;
int literals_divisor;
int match_divisor;
int rle1_divisor;
int rle2_divisor;
} lzsa_stats;
/** Compression context */
typedef struct _lzsa_compressor {
divsufsort_ctx_t divsufsort_context;
unsigned int *intervals;
unsigned int *pos_data;
unsigned int *open_intervals;
lzsa_match *match;
lzsa_match *best_match;
lzsa_match *improved_match;
lzsa_arrival *arrival;
char *rep_slot_handled_mask;
char *rep_len_handled_mask;
int *first_offset_for_byte;
int *next_offset_for_pos;
int min_match_size;
int format_version;
int flags;
int safe_dist;
int num_commands;
int end_position;
lzsa_stats stats;
} lzsa_compressor;
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lzsa_compressor *pCompressor);
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_CONTEXT_H */

View file

@ -0,0 +1,185 @@
/*
* shrink_inmem.c - in-memory compression implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "shrink_inmem.h"
#include "shrink_context.h"
#include "frame.h"
#include "format.h"
#include "lib.h"
/**
* Get maximum compressed size of input(source) data
*
* @param nInputSize input(source) size in bytes
*
* @return maximum compressed size
*/
size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize) {
return lzsa_get_header_size() + ((nInputSize + (BLOCK_SIZE - 1)) >> 16) * lzsa_get_frame_size() + nInputSize + lzsa_get_frame_size() /* footer */;
}
/**
* Compress memory
*
* @param pInputData pointer to input(source) data to compress
* @param pOutBuffer buffer for compressed data
* @param nInputSize input(source) size in bytes
* @param nMaxOutBufferSize maximum capacity of compression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
*
* @return actual compressed size, or -1 for error
*/
size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion) {
lzsa_compressor compressor;
size_t nOriginalSize = 0;
size_t nCompressedSize = 0L;
int nResult;
int nError = 0;
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) {
return -1;
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nHeaderSize = lzsa_encode_header(pOutBuffer, (int)nMaxOutBufferSize, nFormatVersion);
if (nHeaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
nCompressedSize += nHeaderSize;
}
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;
while (nOriginalSize < nInputSize && !nError) {
int nInDataSize;
nInDataSize = (int)(nInputSize - nOriginalSize);
if (nInDataSize > BLOCK_SIZE)
nInDataSize = BLOCK_SIZE;
if (nInDataSize > 0) {
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
nError = LZSA_ERROR_RAW_TOOLARGE;
break;
}
int nOutDataSize;
int nOutDataEnd = (int)(nMaxOutBufferSize - (lzsa_get_frame_size() + nCompressedSize + lzsa_get_frame_size() /* footer */));
int nFrameSize = lzsa_get_frame_size();
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nFrameSize = 0;
nOutDataEnd = (int)(nMaxOutBufferSize - nCompressedSize);
}
if (nOutDataEnd > BLOCK_SIZE)
nOutDataEnd = BLOCK_SIZE;
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + nFrameSize + nCompressedSize, nOutDataEnd);
if (nOutDataSize >= 0) {
/* Write compressed block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nBlockheaderSize = lzsa_encode_compressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nOutDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
nCompressedSize += nBlockheaderSize;
}
}
if (!nError) {
nOriginalSize += nInDataSize;
nCompressedSize += nOutDataSize;
}
}
else {
/* Write uncompressible, literal block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nError = LZSA_ERROR_RAW_UNCOMPRESSED;
break;
}
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nInDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if ((size_t)nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
nError = LZSA_ERROR_DST;
else {
memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize);
nOriginalSize += nInDataSize;
nCompressedSize += nBlockheaderSize + nInDataSize;
}
}
}
nPreviousBlockSize = nInDataSize;
nNumBlocks++;
}
}
if (!nError) {
int nFooterSize;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nFooterSize = 0;
}
else {
nFooterSize = lzsa_encode_footer_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize));
if (nFooterSize < 0)
nError = LZSA_ERROR_COMPRESSION;
}
nCompressedSize += nFooterSize;
}
lzsa_compressor_destroy(&compressor);
if (nError) {
return -1;
}
else {
return nCompressedSize;
}
}

View file

@ -0,0 +1,71 @@
/*
* shrink_inmem.h - in-memory compression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_INMEM_H
#define _SHRINK_INMEM_H
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get maximum compressed size of input(source) data
*
* @param nInputSize input(source) size in bytes
*
* @return maximum compressed size
*/
size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize);
/**
* Compress memory
*
* @param pInputData pointer to input(source) data to compress
* @param pOutBuffer buffer for compressed data
* @param nInputSize input(source) size in bytes
* @param nMaxOutBufferSize maximum capacity of compression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
*
* @return actual compressed size, or -1 for error
*/
size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_INMEM_H */

View file

@ -0,0 +1,346 @@
/*
* shrink_streaming.c - streaming compression implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "shrink_streaming.h"
#include "format.h"
#include "frame.h"
#include "lib.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <stdio.h>
#endif
/**
* Delete file
*
* @param pszInFilename name of file to delete
*/
static void lzsa_delete_file(const char *pszInFilename) {
#ifdef _WIN32
DeleteFileA(pszInFilename);
#else
remove(pszInFilename);
#endif
}
/*-------------- File API -------------- */
/**
* Compress file
*
* @param pszInFilename name of input(source) file to compress
* @param pszOutFilename name of output(compressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) {
lzsa_stream_t inStream, outStream;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
int nLoadAddress = 0;
unsigned char pInData[2] = { 0 };
lzsa_status_t nStatus;
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
return LZSA_ERROR_SRC;
}
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
inStream.close(&inStream);
return LZSA_ERROR_DST;
}
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
if (nStatus) {
outStream.close(&outStream);
inStream.close(&inStream);
lzsa_delete_file(pszOutFilename);
return nStatus;
}
int nInDataSize;
nInDataSize = inStream.read(&inStream, pInData, 2);
if (nInDataSize == 2) {
nLoadAddress = pInData[0] | (pInData[1] << 8);
} else {
return LZSA_ERROR_SRC;
}
nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount, pSafeDist, pStats, nLoadAddress);
lzsa_dictionary_free(&pDictionaryData);
outStream.close(&outStream);
inStream.close(&inStream);
if (nStatus) {
lzsa_delete_file(pszOutFilename);
}
return nStatus;
}
/*-------------- Streaming API -------------- */
/**
* Compress stream
*
* @param pInStream input(source) stream to compress
* @param pOutStream output(compressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats, int nLoadAddress) {
unsigned char *pInData, *pOutData;
lzsa_compressor compressor;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
int nResult;
unsigned char cFrameData[16];
int nError = 0;
int nRawPadding = (nFlags & LZSA_FLAG_RAW_BLOCK) ? 8 : 0;
int nSafetyMargin = 0;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nSafetyMargin = 3;
} else {
nSafetyMargin = 0;
}
pInData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pInData) {
return LZSA_ERROR_MEMORY;
}
memset(pInData, 0, BLOCK_SIZE * 2);
pOutData = (unsigned char*)malloc(BLOCK_SIZE);
if (!pOutData) {
free(pInData);
pInData = NULL;
return LZSA_ERROR_MEMORY;
}
memset(pOutData, 0, BLOCK_SIZE);
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) {
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
return LZSA_ERROR_MEMORY;
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;
while (!pInStream->eof(pInStream) && !nError) {
int nInDataSize;
if (nPreviousBlockSize) {
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
}
else if (nDictionaryDataSize && pDictionaryData) {
nPreviousBlockSize = nDictionaryDataSize;
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pDictionaryData, nPreviousBlockSize);
}
nInDataSize = (int)pInStream->read(pInStream, pInData + BLOCK_SIZE, BLOCK_SIZE);
if (nInDataSize > 0) {
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
nError = LZSA_ERROR_RAW_TOOLARGE;
break;
}
nDictionaryDataSize = 0;
int nOutDataSize;
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, ((nInDataSize + nRawPadding) >= BLOCK_SIZE) ? BLOCK_SIZE : (nInDataSize + nRawPadding));
if (nOutDataSize >= 0) {
// if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nHeaderSize = 0;
int nRealLoadAddress = nInDataSize - nOutDataSize + nLoadAddress;
printf("%x %x\n", nOutDataSize, compressor.end_position);
/* New inplace LoadAddress */
nHeaderSize = lzsa_encode_addr_le(cFrameData, 16, nRealLoadAddress + nSafetyMargin - 4); //-4 as 4 header bytes are added
if (nHeaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nHeaderSize;
}
/* EndAddress */
nHeaderSize = lzsa_encode_addr_be(cFrameData, 16, nRealLoadAddress + compressor.end_position);
if (nHeaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nHeaderSize;
}
/* DestAddress */
nHeaderSize = lzsa_encode_addr_be(cFrameData, 16, nLoadAddress);
if (nHeaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nHeaderSize;
}
// }
if (!nError) {
if (pOutStream->write(pOutStream, pOutData, (size_t)nOutDataSize) != (size_t)nOutDataSize) {
nError = LZSA_ERROR_DST;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nOutDataSize;
}
}
}
else {
/* Write uncompressible, literal block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nError = LZSA_ERROR_RAW_UNCOMPRESSED;
break;
}
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
nError = LZSA_ERROR_DST;
}
else {
if (pOutStream->write(pOutStream, pInData + BLOCK_SIZE, (size_t)nInDataSize) != (size_t)nInDataSize) {
nError = LZSA_ERROR_DST;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize;
}
}
}
}
nPreviousBlockSize = nInDataSize;
nNumBlocks++;
}
if (!nError && !pInStream->eof(pInStream)) {
if (progress)
progress(nOriginalSize, nCompressedSize);
}
}
if (!nError) {
int nFooterSize = 0;
// if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
// nFooterSize = 0;
// }
// else {
// nFooterSize = lzsa_encode_footer_frame(cFrameData, 16);
// if (nFooterSize < 0)
// nError = LZSA_ERROR_COMPRESSION;
// }
if (pOutStream->write(pOutStream, cFrameData, nFooterSize) != nFooterSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nFooterSize;
}
if (progress)
progress(nOriginalSize, nCompressedSize);
int nCommandCount = lzsa_compressor_get_command_count(&compressor);
int nSafeDist = compressor.safe_dist;
if (pStats)
*pStats = compressor.stats;
lzsa_compressor_destroy(&compressor);
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
if (nError) {
return nError;
}
else {
if (pOriginalSize)
*pOriginalSize = nOriginalSize;
if (pCompressedSize)
*pCompressedSize = nCompressedSize;
if (pCommandCount)
*pCommandCount = nCommandCount;
if (pSafeDist)
*pSafeDist = nSafeDist;
return LZSA_OK;
}
}

View file

@ -0,0 +1,99 @@
/*
* shrink_streaming.h - streaming compression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_STREAMING_H
#define _SHRINK_STREAMING_H
#include "stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
typedef struct _lzsa_stats lzsa_stats;
/*-------------- File API -------------- */
/**
* Compress file
*
* @param pszInFilename name of input(source) file to compress
* @param pszOutFilename name of output(compressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats);
/*-------------- Streaming API -------------- */
/**
* Compress stream
*
* @param pInStream input(source) stream to compress
* @param pOutStream output(compressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats, int nLoadAddress);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_STREAMING_H */

View file

@ -0,0 +1,111 @@
/*
* stream.c - streaming I/O implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "stream.h"
/**
* Close file stream
*
* @param stream stream
*/
static void lzsa_filestream_close(lzsa_stream_t *stream) {
if (stream->obj) {
fclose((FILE*)stream->obj);
stream->obj = NULL;
stream->read = NULL;
stream->write = NULL;
stream->eof = NULL;
stream->close = NULL;
}
}
/**
* Read from file stream
*
* @param stream stream
* @param ptr buffer to read into
* @param size number of bytes to read
*
* @return number of bytes read
*/
static size_t lzsa_filestream_read(lzsa_stream_t *stream, void *ptr, size_t size) {
return fread(ptr, 1, size, (FILE*)stream->obj);
}
/**
* Write to file stream
*
* @param stream stream
* @param ptr buffer to write from
* @param size number of bytes to write
*
* @return number of bytes written
*/
static size_t lzsa_filestream_write(lzsa_stream_t *stream, void *ptr, size_t size) {
return fwrite(ptr, 1, size, (FILE*)stream->obj);
}
/**
* Check if file stream has reached the end of the data
*
* @param stream stream
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
static int lzsa_filestream_eof(lzsa_stream_t *stream) {
return feof((FILE*)stream->obj);
}
/**
* Open file and create an I/O stream from it
*
* @param stream stream to fill out
* @param pszInFilename filename
* @param pszMode open mode, as with fopen()
*
* @return 0 for success, nonzero for failure
*/
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode) {
stream->obj = (void*)fopen(pszInFilename, pszMode);
if (stream->obj) {
stream->read = lzsa_filestream_read;
stream->write = lzsa_filestream_write;
stream->eof = lzsa_filestream_eof;
stream->close = lzsa_filestream_close;
return 0;
}
else
return -1;
}

View file

@ -0,0 +1,103 @@
/*
* stream.h - streaming I/O definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _STREAM_H
#define _STREAM_H
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef struct _lzsa_stream_t lzsa_stream_t;
/* I/O stream */
typedef struct _lzsa_stream_t {
/** Opaque stream-specific pointer */
void *obj;
/**
* Read from stream
*
* @param stream stream
* @param ptr buffer to read into
* @param size number of bytes to read
*
* @return number of bytes read
*/
size_t(*read)(lzsa_stream_t *stream, void *ptr, size_t size);
/**
* Write to stream
*
* @param stream stream
* @param ptr buffer to write from
* @param size number of bytes to write
*
* @return number of bytes written
*/
size_t(*write)(lzsa_stream_t *stream, void *ptr, size_t size);
/**
* Check if stream has reached the end of the data
*
* @param stream stream
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
int(*eof)(lzsa_stream_t *stream);
/**
* Close stream
*
* @param stream stream
*/
void(*close)(lzsa_stream_t *stream);
} lzsa_stream_t;
/**
* Open file and create an I/O stream from it
*
* @param stream stream to fill out
* @param pszInFilename filename
* @param pszMode open mode, as with fopen()
*
* @return 0 for success, nonzero for failure
*/
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode);
#ifdef __cplusplus
}
#endif
#endif /* _STREAM_H */