From 112763d0ccecd1e5b850e7bcbc93594e833c89e4 Mon Sep 17 00:00:00 2001 From: Rahila Syed Date: Thu, 3 Jul 2014 19:55:58 +0530 Subject: [PATCH 1/2] Support for LZ4 and Snappy-2 --- src/common/Makefile | 23 +- src/common/lz4/LICENSE | 24 + src/common/lz4/Makefile | 29 + src/common/lz4/lz4.c | 879 +++++++++++++++++++++ src/common/lz4/lz4.h | 248 ++++++ src/common/lz4/lz4hc.c | 908 +++++++++++++++++++++ src/common/lz4/lz4hc.h | 172 ++++ src/common/snappy/LICENSE | 29 + src/common/snappy/Makefile | 36 + src/common/snappy/snappy-compat.h | 57 ++ src/common/snappy/snappy-int.h | 71 ++ src/common/snappy/snappy.c | 1563 +++++++++++++++++++++++++++++++++++++ src/common/snappy/snappy.h | 35 + src/include/utils/pg_lz4.h | 6 + src/include/utils/pg_snappy.h | 10 + 15 files changed, 4081 insertions(+), 9 deletions(-) create mode 100644 src/common/lz4/LICENSE create mode 100644 src/common/lz4/Makefile create mode 100644 src/common/lz4/lz4.c create mode 100644 src/common/lz4/lz4.h create mode 100644 src/common/lz4/lz4hc.c create mode 100644 src/common/lz4/lz4hc.h create mode 100644 src/common/snappy/LICENSE create mode 100644 src/common/snappy/Makefile create mode 100644 src/common/snappy/snappy-compat.h create mode 100644 src/common/snappy/snappy-int.h create mode 100644 src/common/snappy/snappy.c create mode 100644 src/common/snappy/snappy.h create mode 100644 src/include/utils/pg_lz4.h create mode 100644 src/include/utils/pg_snappy.h diff --git a/src/common/Makefile b/src/common/Makefile index 7edbaaa..73fb85d 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -22,12 +22,18 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS := -DFRONTEND $(CPPFLAGS) LIBS += $(PTHREAD_LIBS) +SUBDIRS= snappy lz4 +include submake-errcodes -OBJS_COMMON = exec.o pgfnames.o psprintf.o relpath.o rmtree.o username.o wait_error.o +include $(top_srcdir)/src/backend/common.mk -OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o +LOCAL_OBJS_COMMON = exec.o pgfnames.o psprintf.o relpath.o rmtree.o username.o wait_error.o +LOCAL_OBJS_FRONTEND = $(LOCAL_OBJS_COMMON) fe_memutils.o +LOCAL_OBJS_SRV = $(LOCAL_OBJS_COMMON:%.o=%_srv.o) -OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o) +SUBDIROBJS_EX = $(call expand_subsys,$(SUBDIROBJS)) +OBJS_FRONTEND = $(LOCAL_OBJS_FRONTEND) $(SUBDIROBJS_EX) +OBJS_SRV = $(LOCAL_OBJS_SRV) $(SUBDIROBJS_EX:%.o=%_srv.o) all: libpgcommon.a libpgcommon_srv.a @@ -41,16 +47,15 @@ installdirs: uninstall: rm -f '$(DESTDIR)$(libdir)/libpgcommon.a' -libpgcommon.a: $(OBJS_FRONTEND) - $(AR) $(AROPT) $@ $^ +libpgcommon.a: $(LOCAL_OBJS_FRONTEND) $(SUBDIROBJS) + $(AR) $(AROPT) $@ $(OBJS_FRONTEND) # # Server versions of object files # -libpgcommon_srv.a: $(OBJS_SRV) - $(AR) $(AROPT) $@ $^ - +libpgcommon_srv.a: $(LOCAL_OBJS_SRV) $(SUBDIROBJS) + $(AR) $(AROPT) $@ $(OBJS_SRV) # Because this uses its own compilation rule, it doesn't use the # dependency tracking logic from Makefile.global. To make sure that # dependency tracking works anyway for the *_srv.o files, depend on @@ -68,4 +73,4 @@ submake-errcodes: $(MAKE) -C ../backend submake-errcodes clean distclean maintainer-clean: - rm -f libpgcommon.a libpgcommon_srv.a $(OBJS_FRONTEND) $(OBJS_SRV) + rm -f libpgcommon.a libpgcommon_srv.a $(LOCAL_OBJS_FRONTEND) $(LOCAL_OBJS_SRV) diff --git a/src/common/lz4/LICENSE b/src/common/lz4/LICENSE new file mode 100644 index 0000000..b566df3 --- /dev/null +++ b/src/common/lz4/LICENSE @@ -0,0 +1,24 @@ +LZ4 Library +Copyright (c) 2011-2014, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/src/common/lz4/Makefile b/src/common/lz4/Makefile new file mode 100644 index 0000000..2d0a298 --- /dev/null +++ b/src/common/lz4/Makefile @@ -0,0 +1,29 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for common/lz4 +# +# IDENTIFICATION +# src/common/lz4/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/common/lz4 +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := -Wno-missing-declarations -Wno-missing-prototypes $(CPPFLAGS) +override CPPFLAGS := -DFRONTEND $(CPPFLAGS) + +OBJS = lz4hc.o lz4.o +OBJS_SRV = $(OBJS:%.o=%_srv.o) + +include $(top_srcdir)/src/backend/common.mk + +%_srv.o: %.c %.o + $(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@ + +clean distclean maintainer-clean: + rm -f $(OBJS_SRV) + +all: $(OBJS_SRV) diff --git a/src/common/lz4/lz4.c b/src/common/lz4/lz4.c new file mode 100644 index 0000000..07b4160 --- /dev/null +++ b/src/common/lz4/lz4.c @@ -0,0 +1,879 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-2014, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/************************************** + Tuning parameters +**************************************/ +/* + * MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#define MEMORY_USAGE 14 + +/* + * HEAPMODE : + * Select how default compression functions will allocate memory for their hash table, + * in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)). + */ +#define HEAPMODE 0 + + +/************************************** + CPU Feature Detection +**************************************/ +/* 32 or 64 bits ? */ +#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ + || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \ + || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \ + || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) /* Detects 64 bits mode */ +# define LZ4_ARCH64 1 +#else +# define LZ4_ARCH64 0 +#endif + +/* + * Little Endian or Big Endian ? + * Overwrite the #define below if you know your architecture endianess + */ +#if defined (__GLIBC__) +# include +# if (__BYTE_ORDER == __BIG_ENDIAN) +# define LZ4_BIG_ENDIAN 1 +# endif +#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) +# define LZ4_BIG_ENDIAN 1 +#elif defined(__sparc) || defined(__sparc__) \ + || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ + || defined(__hpux) || defined(__hppa) \ + || defined(_MIPSEB) || defined(__s390__) +# define LZ4_BIG_ENDIAN 1 +#else +/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ +#endif + +/* + * Unaligned memory access is automatically enabled for "common" CPU, such as x86. + * For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property + * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance + */ +#if defined(__ARM_FEATURE_UNALIGNED) +# define LZ4_FORCE_UNALIGNED_ACCESS 1 +#endif + +/* Define this parameter if your target system or compiler does not support hardware bit count */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + +/* + * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : + * This option may provide a small boost to performance for some big endian cpu, although probably modest. + * You may set this option to 1 if data will remain within closed environment. + * This option is useless on Little_Endian CPU (such as x86) + */ + +/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ + + +/************************************** + Compiler Options +**************************************/ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +/* "restrict" is a known keyword */ +#else +# define restrict /* Disable restrict */ +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# if LZ4_ARCH64 /* 64-bits */ +# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ +# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ +# else /* 32-bits */ +# pragma intrinsic(_BitScanForward) /* For Visual 2005 */ +# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ +# endif +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define lz4_bswap16(x) _byteswap_ushort(x) +#else +# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) +#endif + +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + + +/************************************** + Memory routines +**************************************/ +#include /* malloc, calloc, free */ +#define ALLOCATOR(n,s) calloc(n,s) +#define FREEMEM free +#include /* memset, memcpy */ +#define MEM_INIT memset + + +/************************************** + Includes +**************************************/ +#include "postgres.h" +#include "lz4.h" +/************************************** + Basic Types +**************************************/ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + +#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif +#endif + +typedef struct { U16 v; } _PACKED U16_S; +typedef struct { U32 v; } _PACKED U32_S; +typedef struct { U64 v; } _PACKED U64_S; +typedef struct {size_t v;} _PACKED size_t_S; + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# if defined(__SUNPRO_C) || defined(__SUNPRO_CC) +# pragma pack(0) +# else +# pragma pack(pop) +# endif +#endif + +#define A16(x) (((U16_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) +#define AARCH(x) (((size_t_S *)(x))->v) + + +/************************************** + Constants +**************************************/ +#define LZ4_HASHLOG (MEMORY_USAGE-2) +#define HASHTABLESIZE (1 << MEMORY_USAGE) +#define HASHNBCELLS4 (1 << LZ4_HASHLOG) + +#define MINMATCH 4 + +#define COPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH+MINMATCH) +static const int LZ4_minLength = (MFLIMIT+1); + +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +#define LZ4_64KLIMIT ((64 KB) + (MFLIMIT-1)) +#define SKIPSTRENGTH 6 /* Increasing this value will make the compression run slower on incompressible data */ + +#define MAXD_LOG 16 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) + +#define ML_BITS 4 +#define ML_MASK ((1U<=e; */ +#else +# define LZ4_WILDCOPY(d,s,e) { if (likely(e-d <= 8)) LZ4_COPY8(d,s) else do { LZ4_COPY8(d,s) } while (d>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clzll(val) >> 3); +# else + int r; + if (!(val>>32)) { r=4; } else { r=0; val>>=32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif +# else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctzll(val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif +# endif +} + +#else + +FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) +{ +# if defined(LZ4_BIG_ENDIAN) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanReverse( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clz(val) >> 3); +# else + int r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif +# else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctz(val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif +# endif +} + +#endif + + +/**************************** + Compression functions +****************************/ +int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } + +FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType) +{ + if (tableType == byU16) + return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +FORCE_INLINE int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); } + +FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + switch (tableType) + { + case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; } + } +} + +FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; } + if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; } + { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ +} + +FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} + + +FORCE_INLINE int LZ4_compress_generic( + void* ctx, + const char* source, + char* dest, + int inputSize, + int maxOutputSize, + + limitedOutput_directive limitedOutput, + tableType_t tableType, + prefix64k_directive prefix) +{ + const BYTE* ip = (const BYTE*) source; + const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source; + const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source); + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; + + /* Init conditions */ + if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ + if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0; /* must continue from end of previous block */ + if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend; /* do it now, due to potential early exit */ + if ((tableType == byU16) && (inputSize>=(int)LZ4_64KLIMIT)) return 0; /* Size too large (not within 64K limit) */ + if (inputSize> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if (unlikely(forwardIp > mflimit)) { goto _last_literals; } + + forwardH = LZ4_hashPosition(forwardIp, tableType); + ref = LZ4_getPositionOnHash(h, ctx, tableType, base); + LZ4_putPositionOnHash(ip, h, ctx, tableType, base); + + } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip>anchor) && (ref > lowLimit) && (unlikely(ip[-1]==ref[-1]))) { ip--; ref--; } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + if ((limitedOutput) && (unlikely(op + length + (2 + 1 + LASTLITERALS) + (length/255) > oend))) return 0; /* Check output limit */ + if (length>=(int)RUN_MASK) + { + int len = length-RUN_MASK; + *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(length<>8) > oend))) return 0; /* Check output limit */ + if (length>=(int)ML_MASK) + { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length-=255; *op++ = 255; } + *op++ = (BYTE)length; + } + else *token += (BYTE)(length); + + /* Test end of chunk */ + if (ip > mflimit) { anchor = ip; break; } + + /* Fill table */ + LZ4_putPosition(ip-2, ctx, tableType, base); + + /* Test next position */ + ref = LZ4_getPosition(ip, ctx, tableType, base); + LZ4_putPosition(ip, ctx, tableType, base); + if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } + + /* Prepare next loop */ + anchor = ip++; + forwardH = LZ4_hashPosition(ip, tableType); + } + +_last_literals: + /* Encode Last Literals */ + { + int lastRun = (int)(iend - anchor); + if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun<hashTable, 0, sizeof(lz4ds->hashTable)); + lz4ds->bufferStart = base; + lz4ds->base = base; + lz4ds->nextBlock = base; +} + +int LZ4_resetStreamState(void* state, const char* inputBuffer) +{ + if ((((size_t)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */ + LZ4_init((LZ4_Data_Structure*)state, (const BYTE*)inputBuffer); + return 0; +} + +void* LZ4_create (const char* inputBuffer) +{ + void* lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure)); + LZ4_init ((LZ4_Data_Structure*)lz4ds, (const BYTE*)inputBuffer); + return lz4ds; +} + + +int LZ4_free (void* LZ4_Data) +{ + FREEMEM(LZ4_Data); + return (0); +} + + +char* LZ4_slideInputBuffer (void* LZ4_Data) +{ + LZ4_Data_Structure* lz4ds = (LZ4_Data_Structure*)LZ4_Data; + size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64 KB); + + if ( (lz4ds->base - delta > lz4ds->base) /* underflow control */ + || ((size_t)(lz4ds->nextBlock - lz4ds->base) > 0xE0000000) ) /* close to 32-bits limit */ + { + size_t deltaLimit = (lz4ds->nextBlock - 64 KB) - lz4ds->base; + int nH; + + for (nH=0; nH < HASHNBCELLS4; nH++) + { + if ((size_t)(lz4ds->hashTable[nH]) < deltaLimit) lz4ds->hashTable[nH] = 0; + else lz4ds->hashTable[nH] -= (U32)deltaLimit; + } + memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB); + lz4ds->base = lz4ds->bufferStart; + lz4ds->nextBlock = lz4ds->base + 64 KB; + } + else + { + memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB); + lz4ds->nextBlock -= delta; + lz4ds->base -= delta; + } + + return (char*)(lz4ds->nextBlock); +} + + +int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize) +{ + return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, 0, notLimited, byU32, withPrefix); +} + + +int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, maxOutputSize, limited, byU32, withPrefix); +} + + +/**************************** + Decompression functions +****************************/ +/* + * This generic decompression function cover all use cases. + * It shall be instanciated several times, using different sets of directives + * Note that it is essential this generic function is really inlined, + * in order to remove useless branches during compilation optimisation. + */ +FORCE_INLINE int LZ4_decompress_generic( + const char* source, + char* dest, + int inputSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */ + + int endOnInput, /* endOnOutputSize, endOnInputSize */ + int prefix64k, /* noPrefix, withPrefix */ + int partialDecoding, /* full, partial */ + int targetOutputSize /* only used if partialDecoding==partial */ + ) +{ + /* Local Variables */ + const BYTE* restrict ip = (const BYTE*) source; + const BYTE* ref; + const BYTE* const iend = ip + inputSize; + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + outputSize; + BYTE* cpy; + BYTE* oexit = op + targetOutputSize; + + /*const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; / static reduces speed for LZ4_decompress_safe() on GCC64 */ + const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */ + static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; + + + /* Special cases */ + if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ + if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ + if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); + + + /* Main Loop */ + while (1) + { + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + if ((length=(token>>ML_BITS)) == RUN_MASK) + { + unsigned s=255; + while (((endOnInput)?ip(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-COPYLENGTH))) + { + if (partialDecoding) + { + if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ + if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ + } + else + { + if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ + } + memcpy(op, ip, length); + ip += length; + op += length; + break; /* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; + if ((prefix64k==noPrefix) && (unlikely(ref < (BYTE* const)dest))) goto _output_error; /* Error : offset outside destination buffer */ + + /* get matchlength */ + if ((length=(token&ML_MASK)) == ML_MASK) + { + while ((!endOnInput) || (ipoend-COPYLENGTH-(STEPSIZE-4))) + { + if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last 5 bytes must be literals */ + LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH)); + while(op (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) + +/* +LZ4_compressBound() : + Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible) + primarily useful for memory allocation of output buffer. + inline function is recommended for the general case, + macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation). + + isize : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE) +*/ +int LZ4_compressBound(int isize); + + +/* +LZ4_compress_limitedOutput() : + Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. + If it cannot achieve it, compression will stop, and result of the function will be zero. + This function never writes outside of provided output buffer. + + inputSize : Max supported value is LZ4_MAX_INPUT_VALUE + maxOutputSize : is the size of the destination buffer (which must be already allocated) + return : the number of bytes written in buffer 'dest' + or 0 if the compression fails +*/ +int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); + + +/* +LZ4_decompress_fast() : + originalSize : is the original and therefore uncompressed size + return : the number of bytes read from the source buffer (in other words, the compressed size) + If the source stream is malformed, the function will stop decoding and return a negative result. + note : This function is a bit faster than LZ4_decompress_safe() + This function never writes outside of output buffers, but may read beyond input buffer in case of malicious data packet. + Use this function preferably into a trusted environment (data to decode comes from a trusted source). + Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes. +*/ +int LZ4_decompress_fast (const char* source, char* dest, int originalSize); + + +/* +LZ4_decompress_safe_partial() : + This function decompress a compressed block of size 'inputSize' at position 'source' + into output buffer 'dest' of size 'maxOutputSize'. + The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached, + reducing decompression time. + return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) + Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller. + Always control how many bytes were decoded. + If the source stream is detected malformed, the function will stop decoding and return a negative result. + This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets +*/ +int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize); + + +/* +These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods. +To know how much memory must be allocated for the compression tables, use : +int LZ4_sizeofState(); + +Note that tables must be aligned on 4-bytes boundaries, otherwise compression will fail (return code 0). + +The allocated memory can be provided to the compressions functions using 'void* state' parameter. +LZ4_compress_withState() and LZ4_compress_limitedOutput_withState() are equivalent to previously described functions. +They just use the externally allocated memory area instead of allocating their own (on stack, or on heap). +*/ +int LZ4_sizeofState(void); +int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); + + +/************************************** + Streaming Functions +**************************************/ +void* LZ4_create (const char* inputBuffer); +int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize); +int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize); +char* LZ4_slideInputBuffer (void* LZ4_Data); +int LZ4_free (void* LZ4_Data); + +/* +These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks. +In order to achieve this, it is necessary to start creating the LZ4 Data Structure, thanks to the function : + +void* LZ4_create (const char* inputBuffer); +The result of the function is the (void*) pointer on the LZ4 Data Structure. +This pointer will be needed in all other functions. +If the pointer returned is NULL, then the allocation has failed, and compression must be aborted. +The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. +The input buffer must be already allocated, and size at least 192KB. +'inputBuffer' will also be the 'const char* source' of the first block. + +All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'. +To compress each block, use either LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(). +Their behavior are identical to LZ4_compress() or LZ4_compress_limitedOutput(), +but require the LZ4 Data Structure as their first argument, and check that each block starts right after the previous one. +If next block does not begin immediately after the previous one, the compression will fail (return 0). + +When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to : +char* LZ4_slideInputBuffer(void* LZ4_Data); +must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer. +Note that, for this function to work properly, minimum size of an input buffer must be 192KB. +==> The memory position where the next input data block must start is provided as the result of the function. + +Compression can then resume, using LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(), as usual. + +When compression is completed, a call to LZ4_free() will release the memory used by the LZ4 Data Structure. +*/ + + +int LZ4_sizeofStreamState(void); +int LZ4_resetStreamState(void* state, const char* inputBuffer); + +/* +These functions achieve the same result as : +void* LZ4_create (const char* inputBuffer); + +They are provided here to allow the user program to allocate memory using its own routines. + +To know how much space must be allocated, use LZ4_sizeofStreamState(); +Note also that space must be 4-bytes aligned. + +Once space is allocated, you must initialize it using : LZ4_resetStreamState(void* state, const char* inputBuffer); +void* state is a pointer to the space allocated. +It must be aligned on 4-bytes boundaries, and be large enough. +The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. +The input buffer must be already allocated, and size at least 192KB. +'inputBuffer' will also be the 'const char* source' of the first block. + +The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState(). +return value of LZ4_resetStreamState() must be 0 is OK. +Any other value means there was an error (typically, pointer is not aligned on 4-bytes boundaries). +*/ + + +int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize); +int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize); + +/* +*_withPrefix64k() : + These decoding functions work the same as their "normal name" versions, + but can use up to 64KB of data in front of 'char* dest'. + These functions are necessary to decode inter-dependant blocks. +*/ + + +/************************************** + Obsolete Functions +**************************************/ +/* +These functions are deprecated and should no longer be used. +They are provided here for compatibility with existing user programs. +*/ +int LZ4_uncompress (const char* source, char* dest, int outputSize); +int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + + +#if defined (__cplusplus) +} +#endif diff --git a/src/common/lz4/lz4hc.c b/src/common/lz4/lz4hc.c new file mode 100644 index 0000000..4cb3eff --- /dev/null +++ b/src/common/lz4/lz4hc.c @@ -0,0 +1,908 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Copyright (C) 2011-2014, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + - LZ4 source repository : http://code.google.com/p/lz4/ +*/ + + + +/************************************** + Tuning Parameter +**************************************/ +#define LZ4HC_DEFAULT_COMPRESSIONLEVEL 8 + + +/************************************** + Memory routines +**************************************/ +#include /* calloc, free */ +#define ALLOCATOR(s) calloc(1,s) +#define FREEMEM free +#include /* memset, memcpy */ +#define MEM_INIT memset + + +/************************************** + CPU Feature Detection +**************************************/ +/* 32 or 64 bits ? */ +#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ + || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \ + || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \ + || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) /* Detects 64 bits mode */ +# define LZ4_ARCH64 1 +#else +# define LZ4_ARCH64 0 +#endif + +/* + * Little Endian or Big Endian ? + * Overwrite the #define below if you know your architecture endianess + */ +#if defined (__GLIBC__) +# include +# if (__BYTE_ORDER == __BIG_ENDIAN) +# define LZ4_BIG_ENDIAN 1 +# endif +#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) +# define LZ4_BIG_ENDIAN 1 +#elif defined(__sparc) || defined(__sparc__) \ + || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ + || defined(__hpux) || defined(__hppa) \ + || defined(_MIPSEB) || defined(__s390__) +# define LZ4_BIG_ENDIAN 1 +#else +/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */ +#endif + +/* + * Unaligned memory access is automatically enabled for "common" CPU, such as x86. + * For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected + * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance + */ +#if defined(__ARM_FEATURE_UNALIGNED) +# define LZ4_FORCE_UNALIGNED_ACCESS 1 +#endif + +/* Define this parameter if your target system or compiler does not support hardware bit count */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + + +/************************************** + Compiler Options +**************************************/ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +/* "restrict" is a known keyword */ +#else +# define restrict /* Disable restrict */ +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# if LZ4_ARCH64 /* 64-bits */ +# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */ +# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */ +# else /* 32-bits */ +# pragma intrinsic(_BitScanForward) /* For Visual 2005 */ +# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */ +# endif +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable used */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# define lz4_bswap16(x) _byteswap_ushort(x) +#else +# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) +#endif + + +/************************************** + Includes +**************************************/ +#include "lz4hc.h" +#include "lz4.h" +#include "postgres.h" + +/************************************** + Basic Types +**************************************/ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + +#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# ifdef __IBMC__ +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif +#endif + +typedef struct _U16_S { U16 v; } _PACKED U16_S; +typedef struct _U32_S { U32 v; } _PACKED U32_S; +typedef struct _U64_S { U64 v; } _PACKED U64_S; + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# pragma pack(pop) +#endif + +#define A64(x) (((U64_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A16(x) (((U16_S *)(x))->v) + + +/************************************** + Constants +**************************************/ +#define MINMATCH 4 + +#define DICTIONARY_LOGSIZE 16 +#define MAXD (1<> ((MINMATCH*8)-HASH_LOG)) +#define HASH_VALUE(p) HASH_FUNCTION(A32(p)) +#define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base) +#define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK] +#define GETNEXT(p) ((p) - (size_t)DELTANEXT(p)) + + +/************************************** + Private functions +**************************************/ +#if LZ4_ARCH64 + +FORCE_INLINE int LZ4_NbCommonBytes (register U64 val) +{ +#if defined(LZ4_BIG_ENDIAN) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clzll(val) >> 3); +# else + int r; + if (!(val>>32)) { r=4; } else { r=0; val>>=32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif +#else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctzll(val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; +# endif +#endif +} + +#else + +FORCE_INLINE int LZ4_NbCommonBytes (register U32 val) +{ +#if defined(LZ4_BIG_ENDIAN) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanReverse( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clz(val) >> 3); +# else + int r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif +#else +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward( &r, val ); + return (int)(r>>3); +# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctz(val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif +#endif +} + +#endif + + +int LZ4_sizeofStreamStateHC() +{ + return sizeof(LZ4HC_Data_Structure); +} + +FORCE_INLINE void LZ4_initHC (LZ4HC_Data_Structure* hc4, const BYTE* base) +{ + MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); + MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); + hc4->nextToUpdate = base + 1; + hc4->base = base; + hc4->inputBuffer = base; + hc4->end = base; +} + +int LZ4_resetStreamStateHC(void* state, const char* inputBuffer) +{ + if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */ + LZ4_initHC((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer); + return 0; +} + + +void* LZ4_createHC (const char* inputBuffer) +{ + void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure)); + LZ4_initHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer); + return hc4; +} + + +int LZ4_freeHC (void* LZ4HC_Data) +{ + FREEMEM(LZ4HC_Data); + return (0); +} + + +/* Update chains up to ip (excluded) */ +FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) +{ + U16* chainTable = hc4->chainTable; + HTYPE* HashTable = hc4->hashTable; + INITBASE(base,hc4->base); + + while(hc4->nextToUpdate < ip) + { + const BYTE* const p = hc4->nextToUpdate; + size_t delta = (p) - HASH_POINTER(p); + if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; + DELTANEXT(p) = (U16)delta; + HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base); + hc4->nextToUpdate++; + } +} + + +char* LZ4_slideInputBufferHC(void* LZ4HC_Data) +{ + LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data; + U32 distance = (U32)(hc4->end - hc4->inputBuffer) - 64 KB; + distance = (distance >> 16) << 16; /* Must be a multiple of 64 KB */ + LZ4HC_Insert(hc4, hc4->end - MINMATCH); + memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB); + hc4->nextToUpdate -= distance; + hc4->base -= distance; + if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) /* Avoid overflow */ + { + int i; + hc4->base += 1 GB; + for (i=0; ihashTable[i] -= 1 GB; + } + hc4->end -= distance; + return (char*)(hc4->end); +} + + +FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) +{ + const BYTE* p1t = p1; + + while (p1tchainTable; + HTYPE* const HashTable = hc4->hashTable; + const BYTE* ref; + INITBASE(base,hc4->base); + int nbAttempts=maxNbAttempts; + size_t repl=0, ml=0; + U16 delta=0; /* useless assignment, to remove an uninitialization warning */ + + /* HC4 match finder */ + LZ4HC_Insert(hc4, ip); + ref = HASH_POINTER(ip); + +#define REPEAT_OPTIMIZATION +#ifdef REPEAT_OPTIMIZATION + /* Detect repetitive sequences of length <= 4 */ + if ((U32)(ip-ref) <= 4) /* potential repetition */ + { + if (A32(ref) == A32(ip)) /* confirmed */ + { + delta = (U16)(ip-ref); + repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; + *matchpos = ref; + } + ref = GETNEXT(ref); + } +#endif + + while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts)) + { + nbAttempts--; + if (*(ref+ml) == *(ip+ml)) + if (A32(ref) == A32(ip)) + { + size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; + if (mlt > ml) { ml = mlt; *matchpos = ref; } + } + ref = GETNEXT(ref); + } + +#ifdef REPEAT_OPTIMIZATION + /* Complete table */ + if (repl) + { + const BYTE* ptr = ip; + const BYTE* end; + + end = ip + repl - (MINMATCH-1); + while(ptr < end-delta) + { + DELTANEXT(ptr) = delta; /* Pre-Load */ + ptr++; + } + do + { + DELTANEXT(ptr) = delta; + HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base); /* Head of chain */ + ptr++; + } while(ptr < end); + hc4->nextToUpdate = end; + } +#endif + + return (int)ml; +} + + +FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos, const int maxNbAttempts) +{ + U16* const chainTable = hc4->chainTable; + HTYPE* const HashTable = hc4->hashTable; + INITBASE(base,hc4->base); + const BYTE* ref; + int nbAttempts = maxNbAttempts; + int delta = (int)(ip-startLimit); + + /* First Match */ + LZ4HC_Insert(hc4, ip); + ref = HASH_POINTER(ip); + + while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts)) + { + nbAttempts--; + if (*(startLimit + longest) == *(ref - delta + longest)) + if (A32(ref) == A32(ip)) + { +#if 1 + const BYTE* reft = ref+MINMATCH; + const BYTE* ipt = ip+MINMATCH; + const BYTE* startt = ip; + + while (iptstartLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;} + + if ((ipt-startt) > longest) + { + longest = (int)(ipt-startt); + *matchpos = reft; + *startpos = startt; + } + } + ref = GETNEXT(ref); + } + + return longest; +} + + +typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; + +FORCE_INLINE int LZ4HC_encodeSequence ( + const BYTE** ip, + BYTE** op, + const BYTE** anchor, + int matchLength, + const BYTE* ref, + limitedOutput_directive limitedOutputBuffer, + BYTE* oend) +{ + int length; + BYTE* token; + + /* Encode Literal length */ + length = (int)(*ip - *anchor); + token = (*op)++; + if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1; /* Check output limit */ + if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } + else *token = (BYTE)(length<>8) > oend)) return 1; /* Check output limit */ + if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } + else *token += (BYTE)(length); + + /* Prepare next loop */ + *ip += matchLength; + *anchor = *ip; + + return 0; +} + + +#define MAX_COMPRESSION_LEVEL 16 +static int LZ4HC_compress_generic ( + void* ctxvoid, + const char* source, + char* dest, + int inputSize, + int maxOutputSize, + int compressionLevel, + limitedOutput_directive limit + ) +{ + LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid; + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + const int maxNbAttempts = compressionLevel > MAX_COMPRESSION_LEVEL ? 1 << MAX_COMPRESSION_LEVEL : compressionLevel ? 1<<(compressionLevel-1) : 1<end) return 0; + ctx->end += inputSize; + + ip++; + + /* Main Loop */ + while (ip < mflimit) + { + ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); + if (!ml) { ip++; continue; } + + /* saved, in case we would skip too much */ + start0 = ip; + ref0 = ref; + ml0 = ml; + +_Search2: + if (ip+ml < mflimit) + ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts); + else ml2 = ml; + + if (ml2 == ml) /* No better match */ + { + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + continue; + } + + if (start0 < ip) + { + if (start2 < ip + ml0) /* empirical */ + { + ip = start0; + ref = ref0; + ml = ml0; + } + } + + /* Here, start0==ip */ + if ((start2 - ip) < 3) /* First Match too small : removed */ + { + ml = ml2; + ip = start2; + ref =ref2; + goto _Search2; + } + +_Search3: + /* + * Currently we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) + */ + if ((start2 - ip) < OPTIMAL_ML) + { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; + if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) + { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ + + if (start2 + ml2 < mflimit) + ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts); + else ml3 = ml2; + + if (ml3 == ml2) /* No better match : 2 sequences to encode */ + { + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) ml = (int)(start2 - ip); + /* Now, encode 2 sequences */ + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + ip = start2; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; + continue; + } + + if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */ + { + if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ + { + if (start2 < ip+ml) + { + int correction = (int)(ip+ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) + { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _Search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _Search3; + } + + /* + * OK, now we have 3 ascending matches; let's write at least the first one + * ip & ref are known; Now for ml + */ + if (start2 < ip+ml) + { + if ((start2 - ip) < (int)ML_MASK) + { + int correction; + if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) + { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + else + { + ml = (int)(start2 - ip); + } + } + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _Search3; + + } + + /* Encode Last Literals */ + { + int lastRun = (int)(iend - anchor); + if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun< The memory position where the next input data block must start is provided as the result of the function. + +Compression can then resume, using LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(), as usual. + +When compression is completed, a call to LZ4_freeHC() will release the memory used by the LZ4HC Data Structure. +*/ + +int LZ4_sizeofStreamStateHC(void); +int LZ4_resetStreamStateHC(void* state, const char* inputBuffer); + +/* +These functions achieve the same result as : +void* LZ4_createHC (const char* inputBuffer); + +They are provided here to allow the user program to allocate memory using its own routines. + +To know how much space must be allocated, use LZ4_sizeofStreamStateHC(); +Note also that space must be aligned for pointers (32 or 64 bits). + +Once space is allocated, you must initialize it using : LZ4_resetStreamStateHC(void* state, const char* inputBuffer); +void* state is a pointer to the space allocated. +It must be aligned for pointers (32 or 64 bits), and be large enough. +The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer. +The input buffer must be already allocated, and size at least 192KB. +'inputBuffer' will also be the 'const char* source' of the first block. + +The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState(). +return value of LZ4_resetStreamStateHC() must be 0 is OK. +Any other value means there was an error (typically, state is not aligned for pointers (32 or 64 bits)). +*/ + + +#if defined (__cplusplus) +} +#endif diff --git a/src/common/snappy/LICENSE b/src/common/snappy/LICENSE new file mode 100644 index 0000000..247b6dc --- /dev/null +++ b/src/common/snappy/LICENSE @@ -0,0 +1,29 @@ +The snappy-c code is under the same license as the original snappy source + +Copyright 2011 Intel Corporation All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Intel Corporation nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/common/snappy/Makefile b/src/common/snappy/Makefile new file mode 100644 index 0000000..529fd22 --- /dev/null +++ b/src/common/snappy/Makefile @@ -0,0 +1,36 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for common/snappy +# +# IDENTIFICATION +# src/common/snappy/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/common/snappy +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := -Wno-declaration-after-statement $(CPPFLAGS) +#override CPPFLAGS := -Wno-missing-declarations -Wno-missing-prototypes $(CPPFLAGS) +override CPPFLAGS := -DFRONTEND $(CPPFLAGS) + +OBJS = snappy.o +OBJS_SRV = $(OBJS:%.o=%_srv.o) + +include $(top_srcdir)/src/backend/common.mk + +%_srv.o: %.c %.o + $(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@ + +all: $(OBJS_SRV) +#$(OBJS_SRV) : | submake-errcodes + +#.PHONY: submake-errcodes + +#submake-errcodes: +# $(MAKE) -C ../../backend submake-errcodes + +clean distclean maintainer-clean: + rm -f $(OBJS_SRV) diff --git a/src/common/snappy/snappy-compat.h b/src/common/snappy/snappy-compat.h new file mode 100644 index 0000000..69d1735 --- /dev/null +++ b/src/common/snappy/snappy-compat.h @@ -0,0 +1,57 @@ +#ifdef __FreeBSD__ +# include +#elif defined(__APPLE_CC_) || defined(__MACH__) /* MacOS/X support */ +# include + +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +# define htole16(x) (x) +# define le32toh(x) (x) +#elif __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN +# define htole16(x) __DARWIN_OSSwapInt16(x) +# define le32toh(x) __DARWIN_OSSwapInt32(x) +#else +# error "Endianness is undefined" +#endif + + +#else +# include +#endif + +#include +#include +#include +#include +#include +#include + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned u32; +typedef unsigned long long u64; + +#define BUG_ON(x) assert(!(x)) + +#define get_unaligned(x) (*(x)) +#define get_unaligned_le32(x) (le32toh(*(u32 *)(x))) +#define put_unaligned(v,x) (*(x) = (v)) +#define put_unaligned_le16(v,x) (*(u16 *)(x) = htole16(v)) + +#define vmalloc(x) malloc(x) +#define vfree(x) free(x) + +#define EXPORT_SYMBOL(x) + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +#define likely(x) __builtin_expect((x), 1) +#define unlikely(x) __builtin_expect((x), 0) + +#define min_t(t,x,y) ((x) < (y) ? (x) : (y)) +#define max_t(t,x,y) ((x) > (y) ? (x) : (y)) + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __LITTLE_ENDIAN__ 1 +#endif + +#define BITS_PER_LONG (__SIZEOF_LONG__ * 8) diff --git a/src/common/snappy/snappy-int.h b/src/common/snappy/snappy-int.h new file mode 100644 index 0000000..ad31b54 --- /dev/null +++ b/src/common/snappy/snappy-int.h @@ -0,0 +1,71 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Various stubs for the open-source version of Snappy. + +#define likely(x) __builtin_expect(x, 1) +#define unlikely(x) __builtin_expect(x, 0) + +#define CRASH_UNLESS(x) assert(x) +#define CHECK(cond) assert(cond) +#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b)) +#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b)) +#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b)) +#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b)) +#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b)) +#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) + +#define UNALIGNED_LOAD16(_p) (*(const uint16 *)(_p)) +#define UNALIGNED_LOAD32(_p) (*(const uint32 *)(_p)) +#define UNALIGNED_LOAD64(_p) (*(const uint64 *)(_p)) + +#define UNALIGNED_STORE16(_p, _val) (*(uint16 *)(_p) = (_val)) +#define UNALIGNED_STORE32(_p, _val) (*(uint32 *)(_p) = (_val)) +#define UNALIGNED_STORE64(_p, _val) (*(uint64 *)(_p) = (_val)) + +#ifdef NDEBUG + +#define DCHECK(cond) CRASH_UNLESS(true) +#define DCHECK_LE(a, b) CRASH_UNLESS(true) +#define DCHECK_GE(a, b) CRASH_UNLESS(true) +#define DCHECK_EQ(a, b) CRASH_UNLESS(true) +#define DCHECK_NE(a, b) CRASH_UNLESS(true) +#define DCHECK_LT(a, b) CRASH_UNLESS(true) +#define DCHECK_GT(a, b) CRASH_UNLESS(true) + +#else + +#define DCHECK(cond) CHECK(cond) +#define DCHECK_LE(a, b) CHECK_LE(a, b) +#define DCHECK_GE(a, b) CHECK_GE(a, b) +#define DCHECK_EQ(a, b) CHECK_EQ(a, b) +#define DCHECK_NE(a, b) CHECK_NE(a, b) +#define DCHECK_LT(a, b) CHECK_LT(a, b) +#define DCHECK_GT(a, b) CHECK_GT(a, b) + +#endif diff --git a/src/common/snappy/snappy.c b/src/common/snappy/snappy.c new file mode 100644 index 0000000..acfcffc --- /dev/null +++ b/src/common/snappy/snappy.c @@ -0,0 +1,1563 @@ +/* + * C port of the snappy compressor from Google. + * This is a very fast compressor with comparable compression to lzo. + * Works best on 64bit little-endian, but should be good on others too. + * Ported by Andi Kleen. + * Based on snappy 1.0.3 plus some selected changes from SVN. + */ + +/* + * Copyright 2005 Google Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "postgres.h" + +#include "snappy.h" +#include "snappy-compat.h" + +#define CRASH_UNLESS(x) BUG_ON(!(x)) +#define CHECK(cond) CRASH_UNLESS(cond) +#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b)) +#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b)) +#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b)) +#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b)) +#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b)) +#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) + +#define UNALIGNED_LOAD16(_p) get_unaligned((u16 *)(_p)) +#define UNALIGNED_LOAD32(_p) get_unaligned((u32 *)(_p)) +#define UNALIGNED_LOAD64(_p) get_unaligned((u64 *)(_p)) + +#define UNALIGNED_STORE16(_p, _val) put_unaligned(_val, (u16 *)(_p)) +#define UNALIGNED_STORE32(_p, _val) put_unaligned(_val, (u32 *)(_p)) +#define UNALIGNED_STORE64(_p, _val) put_unaligned(_val, (u64 *)(_p)) + +#ifdef NDEBUG + +#define DCHECK(cond) do {} while(0) +#define DCHECK_LE(a, b) do {} while(0) +#define DCHECK_GE(a, b) do {} while(0) +#define DCHECK_EQ(a, b) do {} while(0) +#define DCHECK_NE(a, b) do {} while(0) +#define DCHECK_LT(a, b) do {} while(0) +#define DCHECK_GT(a, b) do {} while(0) + +#else + +#define DCHECK(cond) CHECK(cond) +#define DCHECK_LE(a, b) CHECK_LE(a, b) +#define DCHECK_GE(a, b) CHECK_GE(a, b) +#define DCHECK_EQ(a, b) CHECK_EQ(a, b) +#define DCHECK_NE(a, b) CHECK_NE(a, b) +#define DCHECK_LT(a, b) CHECK_LT(a, b) +#define DCHECK_GT(a, b) CHECK_GT(a, b) + +#endif + +static inline bool is_little_endian(void) +{ +#ifndef WORDS_BIGENDIAN + return true; +#endif + return false; +} + +static inline int log2_floor(u32 n) +{ + return n == 0 ? -1 : 31 ^ __builtin_clz(n); +} + +static inline int find_lsb_set_non_zero(u32 n) +{ + return __builtin_ctz(n); +} + +static inline int find_lsb_set_non_zero64(u64 n) +{ + return __builtin_ctzll(n); +} + +#define kmax32 5 + +/* + * Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. + * Never reads a character at or beyond limit. If a valid/terminated varint32 + * was found in the range, stores it in *OUTPUT and returns a pointer just + * past the last byte of the varint32. Else returns NULL. On success, + * "result <= limit". + */ +static inline const char *varint_parse32_with_limit(const char *p, + const char *l, + u32 * OUTPUT) +{ + const unsigned char *ptr = (const unsigned char *)(p); + const unsigned char *limit = (const unsigned char *)(l); + u32 b, result; + + if (ptr >= limit) + return NULL; + b = *(ptr++); + result = b & 127; + if (b < 128) + goto done; + if (ptr >= limit) + return NULL; + b = *(ptr++); + result |= (b & 127) << 7; + if (b < 128) + goto done; + if (ptr >= limit) + return NULL; + b = *(ptr++); + result |= (b & 127) << 14; + if (b < 128) + goto done; + if (ptr >= limit) + return NULL; + b = *(ptr++); + result |= (b & 127) << 21; + if (b < 128) + goto done; + if (ptr >= limit) + return NULL; + b = *(ptr++); + result |= (b & 127) << 28; + if (b < 16) + goto done; + return NULL; /* Value is too long to be a varint32 */ +done: + *OUTPUT = result; + return (const char *)(ptr); +} + +/* + * REQUIRES "ptr" points to a buffer of length sufficient to hold "v". + * EFFECTS Encodes "v" into "ptr" and returns a pointer to the + * byte just past the last encoded byte. + */ +static inline char *varint_encode32(char *sptr, u32 v) +{ + /* Operate on characters as unsigneds */ + unsigned char *ptr = (unsigned char *)(sptr); + static const int B = 128; + + if (v < (1 << 7)) { + *(ptr++) = v; + } else if (v < (1 << 14)) { + *(ptr++) = v | B; + *(ptr++) = v >> 7; + } else if (v < (1 << 21)) { + *(ptr++) = v | B; + *(ptr++) = (v >> 7) | B; + *(ptr++) = v >> 14; + } else if (v < (1 << 28)) { + *(ptr++) = v | B; + *(ptr++) = (v >> 7) | B; + *(ptr++) = (v >> 14) | B; + *(ptr++) = v >> 21; + } else { + *(ptr++) = v | B; + *(ptr++) = (v >> 7) | B; + *(ptr++) = (v >> 14) | B; + *(ptr++) = (v >> 21) | B; + *(ptr++) = v >> 28; + } + return (char *)(ptr); +} + +#ifdef SG + +struct source { + struct iovec *iov; + int iovlen; + int curvec; + int curoff; + size_t total; +}; + +/* Only valid at beginning when nothing is consumed */ +static inline int available(struct source *s) +{ + return s->total; +} + +static inline const char *peek(struct source *s, size_t *len) +{ + if (likely(s->curvec < s->iovlen)) { + struct iovec *iv = &s->iov[s->curvec]; + if (s->curoff < iv->iov_len) { + *len = iv->iov_len - s->curoff; + return iv->iov_base + s->curoff; + } + } + *len = 0; + return NULL; +} + +static inline void skip(struct source *s, size_t n) +{ + struct iovec *iv = &s->iov[s->curvec]; + s->curoff += n; + DCHECK_LE(s->curoff, iv->iov_len); + if (s->curoff >= iv->iov_len && s->curvec + 1 < s->iovlen) { + s->curoff = 0; + s->curvec++; + } +} + +struct sink { + struct iovec *iov; + int iovlen; + unsigned curvec; + unsigned curoff; + unsigned written; +}; + +static inline void append(struct sink *s, const char *data, size_t n) +{ + struct iovec *iov = &s->iov[s->curvec]; + char *dst = iov->iov_base + s->curoff; + size_t nlen = min_t(size_t, iov->iov_len - s->curoff, n); + if (data != dst) + memcpy(dst, data, nlen); + s->written += n; + s->curoff += nlen; + while ((n -= nlen) > 0) { + data += nlen; + s->curvec++; + DCHECK_LT(s->curvec, s->iovlen); + iov++; + nlen = min_t(size_t, iov->iov_len, n); + memcpy(iov->iov_base, data, nlen); + s->curoff = nlen; + } +} + +static inline void *sink_peek(struct sink *s, size_t n) +{ + struct iovec *iov = &s->iov[s->curvec]; + if (s->curvec < iov->iov_len && iov->iov_len - s->curoff >= n) + return iov->iov_base + s->curoff; + return NULL; +} + +#else + +struct source { + const char *ptr; + size_t left; +}; + +static inline int available(struct source *s) +{ + return s->left; +} + +static inline const char *peek(struct source *s, size_t * len) +{ + *len = s->left; + return s->ptr; +} + +static inline void skip(struct source *s, size_t n) +{ + s->left -= n; + s->ptr += n; +} + +struct sink { + char *dest; +}; + +static inline void append(struct sink *s, const char *data, size_t n) +{ + if (data != s->dest) + memcpy(s->dest, data, n); + s->dest += n; +} + +#define sink_peek(s, n) sink_peek_no_sg(s) + +static inline void *sink_peek_no_sg(const struct sink *s) +{ + return s->dest; +} + +#endif + +struct writer { + char *base; + char *op; + char *op_limit; +}; + +/* Called before decompression */ +static inline void writer_set_expected_length(struct writer *w, size_t len) +{ + w->op_limit = w->op + len; +} + +/* Called after decompression */ +static inline bool writer_check_length(struct writer *w) +{ + return w->op == w->op_limit; +} + +/* + * Copy "len" bytes from "src" to "op", one byte at a time. Used for + * handling COPY operations where the input and output regions may + * overlap. For example, suppose: + * src == "ab" + * op == src + 2 + * len == 20 + * After IncrementalCopy(src, op, len), the result will have + * eleven copies of "ab" + * ababababababababababab + * Note that this does not match the semantics of either memcpy() + * or memmove(). + */ +static inline void incremental_copy(const char *src, char *op, int len) +{ + DCHECK_GT(len, 0); + do { + *op++ = *src++; + } while (--len > 0); +} + +/* + * Equivalent to IncrementalCopy except that it can write up to ten extra + * bytes after the end of the copy, and that it is faster. + * + * The main part of this loop is a simple copy of eight bytes at a time until + * we've copied (at least) the requested amount of bytes. However, if op and + * src are less than eight bytes apart (indicating a repeating pattern of + * length < 8), we first need to expand the pattern in order to get the correct + * results. For instance, if the buffer looks like this, with the eight-byte + * and patterns marked as intervals: + * + * abxxxxxxxxxxxx + * [------] src + * [------] op + * + * a single eight-byte copy from to will repeat the pattern once, + * after which we can move two bytes without moving : + * + * ababxxxxxxxxxx + * [------] src + * [------] op + * + * and repeat the exercise until the two no longer overlap. + * + * This allows us to do very well in the special case of one single byte + * repeated many times, without taking a big hit for more general cases. + * + * The worst case of extra writing past the end of the match occurs when + * op - src == 1 and len == 1; the last copy will read from byte positions + * [0..7] and write to [4..11], whereas it was only supposed to write to + * position 1. Thus, ten excess bytes. + */ + +#define kmax_increment_copy_overflow 10 + +static inline void incremental_copy_fast_path(const char *src, char *op, + int len) +{ + while (op - src < 8) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + len -= op - src; + op += op - src; + } + while (len > 0) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + src += 8; + op += 8; + len -= 8; + } +} + +static inline bool writer_append_from_self(struct writer *w, u32 offset, + u32 len) +{ + char *const op = w->op; + CHECK_LE(op, w->op_limit); + const u32 space_left = w->op_limit - op; + + if (op - w->base <= offset - 1u) /* -1u catches offset==0 */ + return false; + if (len <= 16 && offset >= 8 && space_left >= 16) { + /* Fast path, used for the majority (70-80%) of dynamic + * invocations. */ + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset)); + UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8)); + } else { + if (space_left >= len + kmax_increment_copy_overflow) { + incremental_copy_fast_path(op - offset, op, len); + } else { + if (space_left < len) { + return false; + } + incremental_copy(op - offset, op, len); + } + } + + w->op = op + len; + return true; +} + +static inline bool writer_append(struct writer *w, const char *ip, u32 len) +{ + char *const op = w->op; + CHECK_LE(op, w->op_limit); + const u32 space_left = w->op_limit - op; + if (space_left < len) + return false; + memcpy(op, ip, len); + w->op = op + len; + return true; +} + +static inline bool writer_try_fast_append(struct writer *w, const char *ip, + u32 available_bytes, u32 len) +{ + char *const op = w->op; + const int space_left = w->op_limit - op; + if (len <= 16 && available_bytes >= 16 && space_left >= 16) { + /* Fast path, used for the majority (~95%) of invocations */ + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip)); + UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8)); + w->op = op + len; + return true; + } + return false; +} + +/* + * Any hash function will produce a valid compressed bitstream, but a good + * hash function reduces the number of collisions and thus yields better + * compression for compressible input, and more speed for incompressible + * input. Of course, it doesn't hurt if the hash function is reasonably fast + * either, as it gets called a lot. + */ +static inline u32 hash_bytes(u32 bytes, int shift) +{ + u32 kmul = 0x1e35a7bd; + return (bytes * kmul) >> shift; +} + +static inline u32 hash(const char *p, int shift) +{ + return hash_bytes(UNALIGNED_LOAD32(p), shift); +} + +/* + * Compressed data can be defined as: + * compressed := item* literal* + * item := literal* copy + * + * The trailing literal sequence has a space blowup of at most 62/60 + * since a literal of length 60 needs one tag byte + one extra byte + * for length information. + * + * Item blowup is trickier to measure. Suppose the "copy" op copies + * 4 bytes of data. Because of a special check in the encoding code, + * we produce a 4-byte copy only if the offset is < 65536. Therefore + * the copy op takes 3 bytes to encode, and this type of item leads + * to at most the 62/60 blowup for representing literals. + * + * Suppose the "copy" op copies 5 bytes of data. If the offset is big + * enough, it will take 5 bytes to encode the copy op. Therefore the + * worst case here is a one-byte literal followed by a five-byte copy. + * I.e., 6 bytes of input turn into 7 bytes of "compressed" data. + * + * This last factor dominates the blowup, so the final estimate is: + */ +size_t snappy_max_compressed_length(size_t source_len) +{ + return 32 + source_len + source_len / 6; +} +EXPORT_SYMBOL(snappy_max_compressed_length); + +enum { + LITERAL = 0, + COPY_1_BYTE_OFFSET = 1, /* 3 bit length + 3 bits of offset in opcode */ + COPY_2_BYTE_OFFSET = 2, + COPY_4_BYTE_OFFSET = 3 +}; + +static inline char *emit_literal(char *op, + const char *literal, + int len, bool allow_fast_path) +{ + int n = len - 1; /* Zero-length literals are disallowed */ + + if (n < 60) { + /* Fits in tag byte */ + *op++ = LITERAL | (n << 2); + +/* + * The vast majority of copies are below 16 bytes, for which a + * call to memcpy is overkill. This fast path can sometimes + * copy up to 15 bytes too much, but that is okay in the + * main loop, since we have a bit to go on for both sides: + * + * - The input will always have kInputMarginBytes = 15 extra + * available bytes, as long as we're in the main loop, and + * if not, allow_fast_path = false. + * - The output will always have 32 spare bytes (see + * MaxCompressedLength). + */ + if (allow_fast_path && len <= 16) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal)); + UNALIGNED_STORE64(op + 8, + UNALIGNED_LOAD64(literal + 8)); + return op + len; + } + } else { + /* Encode in upcoming bytes */ + char *base = op; + int count = 0; + op++; + while (n > 0) { + *op++ = n & 0xff; + n >>= 8; + count++; + } + DCHECK(count >= 1); + DCHECK(count <= 4); + *base = LITERAL | ((59 + count) << 2); + } + memcpy(op, literal, len); + return op + len; +} + +static inline char *emit_copy_less_than64(char *op, int offset, int len) +{ + DCHECK_LE(len, 64); + DCHECK_GE(len, 4); + DCHECK_LT(offset, 65536); + + if ((len < 12) && (offset < 2048)) { + int len_minus_4 = len - 4; + DCHECK(len_minus_4 < 8); /* Must fit in 3 bits */ + *op++ = + COPY_1_BYTE_OFFSET | ((len_minus_4) << 2) | ((offset >> 8) + << 5); + *op++ = offset & 0xff; + } else { + *op++ = COPY_2_BYTE_OFFSET | ((len - 1) << 2); + put_unaligned_le16(offset, op); + op += 2; + } + return op; +} + +static inline char *emit_copy(char *op, int offset, int len) +{ + /* + * Emit 64 byte copies but make sure to keep at least four bytes + * reserved + */ + while (len >= 68) { + op = emit_copy_less_than64(op, offset, 64); + len -= 64; + } + + /* + * Emit an extra 60 byte copy if have too much data to fit in + * one copy + */ + if (len > 64) { + op = emit_copy_less_than64(op, offset, 60); + len -= 60; + } + + /* Emit remainder */ + op = emit_copy_less_than64(op, offset, len); + return op; +} + +/** + * snappy_uncompressed_length - return length of uncompressed output. + * @start: compressed buffer + * @n: length of compressed buffer. + * @result: Write the length of the uncompressed output here. + * + * Returns true when successfull, otherwise false. + */ +bool snappy_uncompressed_length(const char *start, size_t n, size_t * result) +{ + u32 v = 0; + const char *limit = start + n; + if (varint_parse32_with_limit(start, limit, &v) != NULL) { + *result = v; + return true; + } else { + return false; + } +} +EXPORT_SYMBOL(snappy_uncompressed_length); + +#define kblock_log 15 +#define kblock_size (1 << kblock_log) + +/* + * This value could be halfed or quartered to save memory + * at the cost of slightly worse compression. + */ +#define kmax_hash_table_bits 14 +#define kmax_hash_table_size (1U << kmax_hash_table_bits) + +/* + * Use smaller hash table when input.size() is smaller, since we + * fill the table, incurring O(hash table size) overhead for + * compression, and if the input is short, we won't need that + * many hash table entries anyway. + */ +static u16 *get_hash_table(struct snappy_env *env, size_t input_size, + int *table_size) +{ + unsigned htsize = 256; + + DCHECK(kmax_hash_table_size >= 256); + while (htsize < kmax_hash_table_size && htsize < input_size) + htsize <<= 1; + CHECK_EQ(0, htsize & (htsize - 1)); + CHECK_LE(htsize, kmax_hash_table_size); + + u16 *table; + table = env->hash_table; + + *table_size = htsize; + memset(table, 0, htsize * sizeof(*table)); + return table; +} + +/* + * Return the largest n such that + * + * s1[0,n-1] == s2[0,n-1] + * and n <= (s2_limit - s2). + * + * Does not read *s2_limit or beyond. + * Does not read *(s1 + (s2_limit - s2)) or beyond. + * Requires that s2_limit >= s2. + * + * Separate implementation for x86_64, for speed. Uses the fact that + * x86_64 is little endian. + */ +#if !defined(WORDS_BIGENDIAN) && BITS_PER_LONG == 64 +static inline int find_match_length(const char *s1, + const char *s2, const char *s2_limit) +{ + int matched = 0; + + DCHECK_GE(s2_limit, s2); + /* + * Find out how long the match is. We loop over the data 64 bits at a + * time until we find a 64-bit block that doesn't match; then we find + * the first non-matching bit and use that to calculate the total + * length of the match. + */ + while (likely(s2 <= s2_limit - 8)) { + if (unlikely + (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) { + s2 += 8; + matched += 8; + } else { + /* + * On current (mid-2008) Opteron models there + * is a 3% more efficient code sequence to + * find the first non-matching byte. However, + * what follows is ~10% better on Intel Core 2 + * and newer, and we expect AMD's bsf + * instruction to improve. + */ + u64 x = + UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + + matched); + int matching_bits = find_lsb_set_non_zero64(x); + matched += matching_bits >> 3; + return matched; + } + } + while (likely(s2 < s2_limit)) { + if (likely(s1[matched] == *s2)) { + ++s2; + ++matched; + } else { + return matched; + } + } + return matched; +} +#else +static inline int find_match_length(const char *s1, + const char *s2, const char *s2_limit) +{ + /* Implementation based on the x86-64 version, above. */ + DCHECK_GE(s2_limit, s2); + int matched = 0; + + while (s2 <= s2_limit - 4 && + UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { + s2 += 4; + matched += 4; + } + if (is_little_endian() && s2 <= s2_limit - 4) { + u32 x = + UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); + int matching_bits = find_lsb_set_non_zero(x); + matched += matching_bits >> 3; + } else { + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; + } + } + return matched; +} +#endif + +/* + * For 0 <= offset <= 4, GetU32AtOffset(UNALIGNED_LOAD64(p), offset) will + * equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have + * empirically found that overlapping loads such as + * UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) + * are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to u32. + */ +static inline u32 get_u32_at_offset(u64 v, int offset) +{ + DCHECK(0 <= offset && offset <= 4); + return v >> (is_little_endian()? 8 * offset : 32 - 8 * offset); +} + +/* + * Flat array compression that does not emit the "uncompressed length" + * prefix. Compresses "input" string to the "*op" buffer. + * + * REQUIRES: "input" is at most "kBlockSize" bytes long. + * REQUIRES: "op" points to an array of memory that is at least + * "MaxCompressedLength(input.size())" in size. + * REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. + * REQUIRES: "table_size" is a power of two + * + * Returns an "end" pointer into "op" buffer. + * "end - op" is the compressed size of "input". + */ + +static char *compress_fragment(const char *const input, + const size_t input_size, + char *op, u16 * table, const unsigned table_size) +{ + /* "ip" is the input pointer, and "op" is the output pointer. */ + const char *ip = input; + CHECK_LE(input_size, kblock_size); + CHECK_EQ(table_size & (table_size - 1), 0); + const int shift = 32 - log2_floor(table_size); + DCHECK_EQ(UINT_MAX >> shift, table_size - 1); + const char *ip_end = input + input_size; + const char *baseip = ip; + /* + * Bytes in [next_emit, ip) will be emitted as literal bytes. Or + * [next_emit, ip_end) after the main loop. + */ + const char *next_emit = ip; + + const unsigned kinput_margin_bytes = 15; + + if (likely(input_size >= kinput_margin_bytes)) { + const char *const ip_limit = input + input_size - + kinput_margin_bytes; + + u32 next_hash; + for (next_hash = hash(++ip, shift);;) { + DCHECK_LT(next_emit, ip); +/* + * The body of this loop calls EmitLiteral once and then EmitCopy one or + * more times. (The exception is that when we're close to exhausting + * the input we goto emit_remainder.) + * + * In the first iteration of this loop we're just starting, so + * there's nothing to copy, so calling EmitLiteral once is + * necessary. And we only start a new iteration when the + * current iteration has determined that a call to EmitLiteral will + * precede the next call to EmitCopy (if any). + * + * Step 1: Scan forward in the input looking for a 4-byte-long match. + * If we get close to exhausting the input then goto emit_remainder. + * + * Heuristic match skipping: If 32 bytes are scanned with no matches + * found, start looking only at every other byte. If 32 more bytes are + * scanned, look at every third byte, etc.. When a match is found, + * immediately go back to looking at every byte. This is a small loss + * (~5% performance, ~0.1% density) for lcompressible data due to more + * bookkeeping, but for non-compressible data (such as JPEG) it's a huge + * win since the compressor quickly "realizes" the data is incompressible + * and doesn't bother looking for matches everywhere. + * + * The "skip" variable keeps track of how many bytes there are since the + * last match; dividing it by 32 (ie. right-shifting by five) gives the + * number of bytes to move ahead for each iteration. + */ + u32 skip_bytes = 32; + + const char *next_ip = ip; + const char *candidate; + do { + ip = next_ip; + u32 hval = next_hash; + DCHECK_EQ(hval, hash(ip, shift)); + u32 bytes_between_hash_lookups = skip_bytes++ >> 5; + next_ip = ip + bytes_between_hash_lookups; + if (unlikely(next_ip > ip_limit)) { + goto emit_remainder; + } + next_hash = hash(next_ip, shift); + candidate = baseip + table[hval]; + DCHECK_GE(candidate, baseip); + DCHECK_LT(candidate, ip); + + table[hval] = ip - baseip; + } while (likely(UNALIGNED_LOAD32(ip) != + UNALIGNED_LOAD32(candidate))); + +/* + * Step 2: A 4-byte match has been found. We'll later see if more + * than 4 bytes match. But, prior to the match, input + * bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." + */ + DCHECK_LE(next_emit + 16, ip_end); + op = emit_literal(op, next_emit, ip - next_emit, true); + +/* + * Step 3: Call EmitCopy, and then see if another EmitCopy could + * be our next move. Repeat until we find no match for the + * input immediately after what was consumed by the last EmitCopy call. + * + * If we exit this loop normally then we need to call EmitLiteral next, + * though we don't yet know how big the literal will be. We handle that + * by proceeding to the next iteration of the main loop. We also can exit + * this loop via goto if we get close to exhausting the input. + */ + u64 input_bytes = 0; + u32 candidate_bytes = 0; + + do { +/* + * We have a 4-byte match at ip, and no need to emit any + * "literal bytes" prior to ip. + */ + const char *base = ip; + int matched = 4 + + find_match_length(candidate + 4, ip + 4, + ip_end); + ip += matched; + int offset = base - candidate; + DCHECK_EQ(0, memcmp(base, candidate, matched)); + op = emit_copy(op, offset, matched); +/* + * We could immediately start working at ip now, but to improve + * compression we first update table[Hash(ip - 1, ...)]. + */ + const char *insert_tail = ip - 1; + next_emit = ip; + if (unlikely(ip >= ip_limit)) { + goto emit_remainder; + } + input_bytes = UNALIGNED_LOAD64(insert_tail); + u32 prev_hash = + hash_bytes(get_u32_at_offset + (input_bytes, 0), shift); + table[prev_hash] = ip - baseip - 1; + u32 cur_hash = + hash_bytes(get_u32_at_offset + (input_bytes, 1), shift); + candidate = baseip + table[cur_hash]; + candidate_bytes = UNALIGNED_LOAD32(candidate); + table[cur_hash] = ip - baseip; + } while (get_u32_at_offset(input_bytes, 1) == + candidate_bytes); + + next_hash = + hash_bytes(get_u32_at_offset(input_bytes, 2), + shift); + ++ip; + } + } + +emit_remainder: + /* Emit the remaining bytes as a literal */ + if (next_emit < ip_end) + op = emit_literal(op, next_emit, ip_end - next_emit, false); + + return op; +} + +/* + * ----------------------------------------------------------------------- + * Lookup table for decompression code. Generated by ComputeTable() below. + * ----------------------------------------------------------------------- + */ + +/* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */ +static const u32 wordmask[] = { + 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu +}; + +/* + * Data stored per entry in lookup table: + * Range Bits-used Description + * ------------------------------------ + * 1..64 0..7 Literal/copy length encoded in opcode byte + * 0..7 8..10 Copy offset encoded in opcode byte / 256 + * 0..4 11..13 Extra bytes after opcode + * + * We use eight bits for the length even though 7 would have sufficed + * because of efficiency reasons: + * (1) Extracting a byte is faster than a bit-field + * (2) It properly aligns copy offset so we do not need a <<8 + */ +static const u16 char_table[256] = { + 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, + 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, + 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, + 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, + 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, + 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, + 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, + 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, + 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, + 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, + 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, + 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, + 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, + 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, + 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, + 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, + 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, + 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, + 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, + 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, + 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, + 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, + 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, + 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, + 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, + 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, + 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, + 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, + 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, + 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, + 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, + 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 +}; + +struct snappy_decompressor { + struct source *reader; /* Underlying source of bytes to decompress */ + const char *ip; /* Points to next buffered byte */ + const char *ip_limit; /* Points just past buffered bytes */ + u32 peeked; /* Bytes peeked from reader (need to skip) */ + bool eof; /* Hit end of input without an error? */ + char scratch[5]; /* Temporary buffer for peekfast boundaries */ +}; + +static void +init_snappy_decompressor(struct snappy_decompressor *d, struct source *reader) +{ + d->reader = reader; + d->ip = NULL; + d->ip_limit = NULL; + d->peeked = 0; + d->eof = false; +} + +static void exit_snappy_decompressor(struct snappy_decompressor *d) +{ + skip(d->reader, d->peeked); +} + +/* + * Read the uncompressed length stored at the start of the compressed data. + * On succcess, stores the length in *result and returns true. + * On failure, returns false. + */ +static bool read_uncompressed_length(struct snappy_decompressor *d, + u32 * result) +{ + DCHECK(d->ip == NULL); /* + * Must not have read anything yet + * Length is encoded in 1..5 bytes + */ + *result = 0; + u32 shift = 0; + while (true) { + if (shift >= 32) + return false; + size_t n; + const char *ip = peek(d->reader, &n); + if (n == 0) + return false; + const unsigned char c = *(const unsigned char *)(ip); + skip(d->reader, 1); + *result |= (u32) (c & 0x7f) << shift; + if (c < 128) { + break; + } + shift += 7; + } + return true; +} + +static bool refill_tag(struct snappy_decompressor *d); + +/* + * Process the next item found in the input. + * Returns true if successful, false on error or end of input. + */ +static void decompress_all_tags(struct snappy_decompressor *d, + struct writer *writer) +{ + const char *ip = d->ip; + + /* + * We could have put this refill fragment only at the beginning of the loop. + * However, duplicating it at the end of each branch gives the compiler more + * scope to optimize the expression based on the local + * context, which overall increases speed. + */ +#define MAYBE_REFILL() \ + if (d->ip_limit - ip < 5) { \ + d->ip = ip; \ + if (!refill_tag(d)) return; \ + ip = d->ip; \ + } + + + MAYBE_REFILL(); + for (;;) { + if (d->ip_limit - ip < 5) { + d->ip = ip; + if (!refill_tag(d)) + return; + ip = d->ip; + } + + const unsigned char c = *(const unsigned char *)(ip++); + + if ((c & 0x3) == LITERAL) { + u32 literal_length = (c >> 2) + 1; + if (writer_try_fast_append(writer, ip, d->ip_limit - ip, + literal_length)) { + DCHECK_LT(literal_length, 61); + ip += literal_length; + MAYBE_REFILL(); + continue; + } + if (unlikely(literal_length >= 61)) { + /* Long literal */ + const u32 literal_ll = literal_length - 60; + literal_length = (get_unaligned_le32(ip) & + wordmask[literal_ll]) + 1; + ip += literal_ll; + } + + u32 avail = d->ip_limit - ip; + while (avail < literal_length) { + if (!writer_append(writer, ip, avail)) + return; + literal_length -= avail; + skip(d->reader, d->peeked); + size_t n; + ip = peek(d->reader, &n); + avail = n; + d->peeked = avail; + if (avail == 0) + return; /* Premature end of input */ + d->ip_limit = ip + avail; + } + if (!writer_append(writer, ip, literal_length)) + return; + ip += literal_length; + MAYBE_REFILL(); + } else { + const u32 entry = char_table[c]; + const u32 trailer = get_unaligned_le32(ip) & + wordmask[entry >> 11]; + const u32 length = entry & 0xff; + ip += entry >> 11; + + /* + * copy_offset/256 is encoded in bits 8..10. + * By just fetching those bits, we get + * copy_offset (since the bit-field starts at + * bit 8). + */ + const u32 copy_offset = entry & 0x700; + if (!writer_append_from_self(writer, + copy_offset + trailer, + length)) + return; + MAYBE_REFILL(); + } + } +} + +#undef MAYBE_REFILL + +static bool refill_tag(struct snappy_decompressor *d) +{ + const char *ip = d->ip; + + if (ip == d->ip_limit) { + size_t n; + /* Fetch a new fragment from the reader */ + skip(d->reader, d->peeked); /* All peeked bytes are used up */ + ip = peek(d->reader, &n); + d->peeked = n; + if (n == 0) { + d->eof = true; + return false; + } + d->ip_limit = ip + n; + } + + /* Read the tag character */ + DCHECK_LT(ip, d->ip_limit); + const unsigned char c = *(const unsigned char *)(ip); + const u32 entry = char_table[c]; + const u32 needed = (entry >> 11) + 1; /* +1 byte for 'c' */ + DCHECK_LE(needed, sizeof(d->scratch)); + + /* Read more bytes from reader if needed */ + u32 nbuf = d->ip_limit - ip; + + if (nbuf < needed) { + /* + * Stitch together bytes from ip and reader to form the word + * contents. We store the needed bytes in "scratch". They + * will be consumed immediately by the caller since we do not + * read more than we need. + */ + memmove(d->scratch, ip, nbuf); + skip(d->reader, d->peeked); /* All peeked bytes are used up */ + d->peeked = 0; + while (nbuf < needed) { + size_t length; + const char *src = peek(d->reader, &length); + if (length == 0) + return false; + u32 to_add = min_t(u32, needed - nbuf, length); + memcpy(d->scratch + nbuf, src, to_add); + nbuf += to_add; + skip(d->reader, to_add); + } + DCHECK_EQ(nbuf, needed); + d->ip = d->scratch; + d->ip_limit = d->scratch + needed; + } else if (nbuf < 5) { + /* + * Have enough bytes, but move into scratch so that we do not + * read past end of input + */ + memmove(d->scratch, ip, nbuf); + skip(d->reader, d->peeked); /* All peeked bytes are used up */ + d->peeked = 0; + d->ip = d->scratch; + d->ip_limit = d->scratch + nbuf; + } else { + /* Pass pointer to buffer returned by reader. */ + d->ip = ip; + } + return true; +} + +static int internal_uncompress(struct source *r, + struct writer *writer, u32 max_len) +{ + struct snappy_decompressor decompressor; + u32 uncompressed_len = 0; + + init_snappy_decompressor(&decompressor, r); + + if (!read_uncompressed_length(&decompressor, &uncompressed_len)) + return -EIO; + /* Protect against possible DoS attack */ + if ((u64) (uncompressed_len) > max_len) + return -EIO; + + writer_set_expected_length(writer, uncompressed_len); + + /* Process the entire input */ + decompress_all_tags(&decompressor, writer); + + exit_snappy_decompressor(&decompressor); + return (decompressor.eof && writer_check_length(writer)) ? 0 : -EIO; +} + +static inline int compress(struct snappy_env *env, struct source *reader, + struct sink *writer) +{ + int err; + size_t written = 0; + int N = available(reader); + char ulength[kmax32]; + char *p = varint_encode32(ulength, N); + + append(writer, ulength, p - ulength); + written += (p - ulength); + + while (N > 0) { + /* Get next block to compress (without copying if possible) */ + size_t fragment_size; + const char *fragment = peek(reader, &fragment_size); + if (fragment_size == 0) { + err = -EIO; + goto out; + } + const unsigned num_to_read = min_t(int, N, kblock_size); + size_t bytes_read = fragment_size; + + int pending_advance = 0; + if (bytes_read >= num_to_read) { + /* Buffer returned by reader is large enough */ + pending_advance = num_to_read; + fragment_size = num_to_read; + } + else { + memcpy(env->scratch, fragment, bytes_read); + skip(reader, bytes_read); + + while (bytes_read < num_to_read) { + fragment = peek(reader, &fragment_size); + size_t n = + min_t(size_t, fragment_size, + num_to_read - bytes_read); + memcpy((char *)(env->scratch) + bytes_read, fragment, n); + bytes_read += n; + skip(reader, n); + } + DCHECK_EQ(bytes_read, num_to_read); + fragment = env->scratch; + fragment_size = num_to_read; + } + if (fragment_size < num_to_read) + return -EIO; + + /* Get encoding table for compression */ + int table_size; + u16 *table = get_hash_table(env, num_to_read, &table_size); + + /* Compress input_fragment and append to dest */ + char *dest; + dest = sink_peek(writer, snappy_max_compressed_length(num_to_read)); + if (!dest) { + /* + * Need a scratch buffer for the output, + * because the byte sink doesn't have enough + * in one piece. + */ + dest = env->scratch_output; + } + char *end = compress_fragment(fragment, fragment_size, + dest, table, table_size); + append(writer, dest, end - dest); + written += (end - dest); + + N -= num_to_read; + skip(reader, pending_advance); + } + + err = 0; +out: + return err; +} + +#ifdef SG + +int snappy_compress_iov(struct snappy_env *env, + struct iovec *iov_in, + int iov_in_len, + size_t input_length, + struct iovec *iov_out, + int *iov_out_len, + size_t *compressed_length) +{ + struct source reader = { + .iov = iov_in, + .iovlen = iov_in_len, + .total = input_length + }; + struct sink writer = { + .iov = iov_out, + .iovlen = *iov_out_len, + }; + int err = compress(env, &reader, &writer); + + *iov_out_len = writer.curvec + 1; + + /* Compute how many bytes were added */ + *compressed_length = writer.written; + return err; +} +EXPORT_SYMBOL(snappy_compress_iov); + +/** + * snappy_compress - Compress a buffer using the snappy compressor. + * @env: Preallocated environment + * @input: Input buffer + * @input_length: Length of input_buffer + * @compressed: Output buffer for compressed data + * @compressed_length: The real length of the output written here. + * + * Return 0 on success, otherwise an negative error code. + * + * The output buffer must be at least + * snappy_max_compressed_length(input_length) bytes long. + * + * Requires a preallocated environment from snappy_init_env. + * The environment does not keep state over individual calls + * of this function, just preallocates the memory. + */ +int snappy_compress(struct snappy_env *env, + const char *input, + size_t input_length, + char *compressed, size_t *compressed_length) +{ + struct iovec iov_in = { + .iov_base = (char *)input, + .iov_len = input_length, + }; + struct iovec iov_out = { + .iov_base = compressed, + .iov_len = 0xffffffff, + }; + int out = 1; + return snappy_compress_iov(env, + &iov_in, 1, input_length, + &iov_out, &out, compressed_length); +} +EXPORT_SYMBOL(snappy_compress); + + +int snappy_uncompress_iov(struct iovec *iov_in, int iov_in_len, + size_t input_len, char *uncompressed) +{ + struct source reader = { + .iov = iov_in, + .iovlen = iov_in_len, + .total = input_len + }; + struct writer output = { + .base = uncompressed, + .op = uncompressed + }; + return internal_uncompress(&reader, &output, 0xffffffff); +} +EXPORT_SYMBOL(snappy_uncompress_iov); + +/** + * snappy_uncompress - Uncompress a snappy compressed buffer + * @compressed: Input buffer with compressed data + * @n: length of compressed buffer + * @uncompressed: buffer for uncompressed data + * + * The uncompressed data buffer must be at least + * snappy_uncompressed_length(compressed) bytes long. + * + * Return 0 on success, otherwise an negative error code. + */ +int snappy_uncompress(const char *compressed, size_t n, char *uncompressed) +{ + struct iovec iov = { + .iov_base = (char *)compressed, + .iov_len = n + }; + return snappy_uncompress_iov(&iov, 1, n, uncompressed); +} +EXPORT_SYMBOL(snappy_uncompress); + +#else +/** + * snappy_compress - Compress a buffer using the snappy compressor. + * @env: Preallocated environment + * @input: Input buffer + * @input_length: Length of input_buffer + * @compressed: Output buffer for compressed data + * @compressed_length: The real length of the output written here. + * + * Return 0 on success, otherwise an negative error code. + * + * The output buffer must be at least + * snappy_max_compressed_length(input_length) bytes long. + * + * Requires a preallocated environment from snappy_init_env. + * The environment does not keep state over individual calls + * of this function, just preallocates the memory. + */ +int snappy_compress(struct snappy_env *env, + const char *input, + size_t input_length, + char *compressed, size_t *compressed_length) +{ + struct source reader = { + .ptr = input, + .left = input_length + }; + struct sink writer = { + .dest = compressed, + }; + int err = compress(env, &reader, &writer); + + /* Compute how many bytes were added */ + *compressed_length = (writer.dest - compressed); + return err; +} +EXPORT_SYMBOL(snappy_compress); +/*Wrapper around snappy compression function. It handles varlena data for snappy*/ +int pg_snappy_compress(const char *source, int32 slen, struct varlena *dest) +{ + char *bp = VARDATA(dest); + static struct snappy_env *snappy_env = NULL; + int ret; + size_t buffer_size; + + if (snappy_env == NULL) + { + snappy_env = malloc(sizeof(struct snappy_env)); + snappy_init_env(snappy_env); + } + + ret = snappy_compress(snappy_env, + source, + slen, + bp, + &buffer_size); + + if (ret == EIO) + return ret; + + SET_VARSIZE_COMPRESSED(dest, buffer_size + VARHDRSZ); + + return ret; +} +EXPORT_SYMBOL(pg_snappy_compress); + +/** + * snappy_uncompress - Uncompress a snappy compressed buffer + * @compressed: Input buffer with compressed data + * @n: length of compressed buffer + * @uncompressed: buffer for uncompressed data + * + * The uncompressed data buffer must be at least + * snappy_uncompressed_length(compressed) bytes long. + * + * Return 0 on success, otherwise an negative error code. + */ +int snappy_uncompress(const char *compressed, size_t n, char *uncompressed) +{ + struct source reader = { + .ptr = compressed, + .left = n + }; + struct writer output = { + .base = uncompressed, + .op = uncompressed + }; + return internal_uncompress(&reader, &output, 0xffffffff); +} +EXPORT_SYMBOL(snappy_uncompress); +#endif + +#ifdef SG +/** + * snappy_init_env_sg - Allocate snappy compression environment + * @env: Environment to preallocate + * @sg: Input environment ever does scather gather + * + * If false is passed to sg then multiple entries in an iovec + * are not legal. + * Returns 0 on success, otherwise negative errno. + * Must run in process context. + */ +int snappy_init_env_sg(struct snappy_env *env, bool sg) +{ + env->hash_table = vmalloc(sizeof(u16) * kmax_hash_table_size); + if (!env->hash_table) + goto error; + if (sg) { + env->scratch = vmalloc(kblock_size); + if (!env->scratch) + goto error; + env->scratch_output = + vmalloc(snappy_max_compressed_length(kblock_size)); + if (!env->scratch_output) + goto error; + } + return 0; +error: + snappy_free_env(env); + return -ENOMEM; +} +EXPORT_SYMBOL(snappy_init_env_sg); +#endif + +/** + * snappy_init_env - Allocate snappy compression environment + * @env: Environment to preallocate + * + * Passing multiple entries in an iovec is not allowed + * on the environment allocated here. + * Returns 0 on success, otherwise negative errno. + * Must run in process context. + */ +int snappy_init_env(struct snappy_env *env) +{ + env->hash_table = vmalloc(sizeof(u16) * kmax_hash_table_size); + if (!env->hash_table) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(snappy_init_env); + +/** + * snappy_free_env - Free an snappy compression environment + * @env: Environment to free. + * + * Must run in process context. + */ +void snappy_free_env(struct snappy_env *env) +{ + vfree(env->hash_table); +#ifdef SG + vfree(env->scratch); + vfree(env->scratch_output); +#endif + memset(env, 0, sizeof(struct snappy_env)); +} +EXPORT_SYMBOL(snappy_free_env); diff --git a/src/common/snappy/snappy.h b/src/common/snappy/snappy.h new file mode 100644 index 0000000..bac715b --- /dev/null +++ b/src/common/snappy/snappy.h @@ -0,0 +1,35 @@ +#ifndef _LINUX_SNAPPY_H +#define _LINUX_SNAPPY_H 1 + +/* Only needed for compression. This preallocates the worst case */ +struct snappy_env { + unsigned short *hash_table; + void *scratch; + void *scratch_output; +}; + +struct iovec; +int snappy_init_env(struct snappy_env *env); +int snappy_init_env_sg(struct snappy_env *env, bool sg); +void snappy_free_env(struct snappy_env *env); +int snappy_uncompress_iov(struct iovec *iov_in, int iov_in_len, + size_t input_len, char *uncompressed); +int snappy_uncompress(const char *compressed, size_t n, char *uncompressed); +int snappy_compress(struct snappy_env *env, + const char *input, + size_t input_length, + char *compressed, + size_t *compressed_length); +int snappy_compress_iov(struct snappy_env *env, + struct iovec *iov_in, + int iov_in_len, + size_t input_length, + struct iovec *iov_out, + int *iov_out_len, + size_t *compressed_length); +bool snappy_uncompressed_length(const char *buf, size_t len, size_t *result); +size_t snappy_max_compressed_length(size_t source_len); +extern int pg_snappy_compress(const char *input, + int32 input_length, + struct varlena *compressed); +#endif diff --git a/src/include/utils/pg_lz4.h b/src/include/utils/pg_lz4.h new file mode 100644 index 0000000..dfae502 --- /dev/null +++ b/src/include/utils/pg_lz4.h @@ -0,0 +1,6 @@ +#ifndef _PG_LZ4 +#define _PG_LZ4 +int LZ4_decompress_fast (const char* source, char* dest, int originalSize); +int LZ4_compressBound(int isize); +int pg_LZ4_compress(const char *source, int32 slen, struct varlena *dest); +#endif diff --git a/src/include/utils/pg_snappy.h b/src/include/utils/pg_snappy.h new file mode 100644 index 0000000..f45db13 --- /dev/null +++ b/src/include/utils/pg_snappy.h @@ -0,0 +1,10 @@ +#ifndef _PG_SNAPPY_H +#define _PG_SNAPPY_H + +extern size_t snappy_max_compressed_length(size_t source_len); +extern bool snappy_uncompressed_length(const char *start, size_t n, size_t * result); +extern int snappy_uncompress(const char *compressed, size_t n, char *uncompressed); +extern int pg_snappy_compress(const char *input, + int32 input_length, + struct varlena *compressed); +#endif -- 1.7.1