Ticket #348: compressed-changesets.diff

File compressed-changesets.diff, 26.2 KB (added by Dan, 13 years ago)
  • bootstrap

    diff --git a/bootstrap b/bootstrap
    index 7998897..7376fd6 100755
    a b else  
    236236  AUTOCONF=$instdir/bin/autoconf \
    237237  lazy_build automake 1.11.2 tar.bz2 34b3808e1d536ee3724fbde28bc1f13b8b40dc43
    238238  lazy_build libtool 2.4.2 tar.gz 22b71a8b5ce3ad86e1094e7285981cae10e6ff88
     239  if [ "$1" = "--deps=libmagic" ] ; then
     240      shift
     241      lazy_build file 5.10 tar.gz 72fd435e78955ee122b7b3d323ff2f92e6263e89
     242  fi
    239243
    240244  for v in $autotools ; do
    241245     tool=`echo "$v"|tr A-Z a-z`
    for d in $modules ; do  
    414418    xapian-core)
    415419      cd "$d" && "$configure" --enable-maintainer-mode --cache-file="$here/config.cache" ${1+"$@"}
    416420      ;;
     421    xapian-applications/omega)
     422      cd "$d" && "$configure" --enable-maintainer-mode XAPIAN_CONFIG="$XAPIAN_CONFIG" CXXFLAGS="-I$srcdir/INST/include -L$srcdir/INST/lib" ${1+"$@"}
     423      ;;
    417424    *)
    418425      cd "$d" && "$configure" --enable-maintainer-mode --cache-file="$here/config.cache" XAPIAN_CONFIG="$XAPIAN_CONFIG" ${1+"$@"}
    419426      ;;
  • xapian-core/backends/brass/brass_database.cc

    diff --git a/xapian-core/backends/brass/brass_database.cc b/xapian-core/backends/brass/brass_database.cc
    index 3ad7197..28335ea 100644
    a b BrassDatabase::set_revision_number(brass_revision_number_t new_revision)  
    444444            // table last, so that ends up cached the most, if the cache
    445445            // available is limited.  Do the position table just before that
    446446            // as having that cached will also improve search performance.
    447             termlist_table.write_changed_blocks(changes_fd);
    448             synonym_table.write_changed_blocks(changes_fd);
    449             spelling_table.write_changed_blocks(changes_fd);
    450             record_table.write_changed_blocks(changes_fd);
    451             position_table.write_changed_blocks(changes_fd);
    452             postlist_table.write_changed_blocks(changes_fd);
     447            bool compressed = CHANGES_VERSION != 1;
     448
     449            //FIXME:dc: this is the wrong place to define the compression
     450            termlist_table.write_changed_blocks(changes_fd, compressed);
     451            synonym_table.write_changed_blocks(changes_fd, compressed);
     452            spelling_table.write_changed_blocks(changes_fd, compressed);
     453            record_table.write_changed_blocks(changes_fd, compressed);
     454            position_table.write_changed_blocks(changes_fd, compressed);
     455            postlist_table.write_changed_blocks(changes_fd, compressed);
    453456        }
    454457
    455458        postlist_table.commit(new_revision, changes_fd);
  • xapian-core/backends/brass/brass_databasereplicator.cc

    diff --git a/xapian-core/backends/brass/brass_databasereplicator.cc b/xapian-core/backends/brass/brass_databasereplicator.cc
    index 002df61..a8b1875 100644
    a b  
    2020 * USA
    2121 */
    2222
     23#include <iostream>
    2324#include <config.h>
    2425
    2526#include "brass_databasereplicator.h"
     
    3435#include "debuglog.h"
    3536#include "fd.h"
    3637#include "filetests.h"
     38#include "internaltypes.h"
    3739#include "io_utils.h"
    3840#include "pack.h"
    3941#include "net/remoteconnection.h"
     
    4244#include "str.h"
    4345#include "stringutils.h"
    4446
     47
    4548#ifdef __WIN32__
    4649# include "msvc_posix_wrapper.h"
    4750#endif
    using namespace std;  
    5255using namespace Xapian;
    5356
    5457BrassDatabaseReplicator::BrassDatabaseReplicator(const string & db_dir_)
    55         : db_dir(db_dir_)
     58    : db_dir(db_dir_),
     59      comp_stream(Z_DEFAULT_STRATEGY)
    5660{
    5761}
    5862
    BrassDatabaseReplicator::process_changeset_chunk_blocks(const string & tablename  
    191195    }
    192196    {
    193197        FD closer(fd);
    194 
     198        Bytef out[8192];
    195199        while (true) {
    196200            conn.get_message_chunk(buf, REASONABLE_CHANGESET_SIZE, end_time);
    197201            ptr = buf.data();
    198202            end = ptr + buf.size();
    199 
    200203            uint4 block_number;
    201204            if (!unpack_uint(&ptr, end, &block_number))
    202205                throw NetworkError("Invalid block number in changeset");
    BrassDatabaseReplicator::process_changeset_chunk_blocks(const string & tablename  
    205208                break;
    206209            --block_number;
    207210
    208             conn.get_message_chunk(buf, changeset_blocksize, end_time);
     211            conn.get_message_chunk(buf, REASONABLE_CHANGESET_SIZE, end_time);
     212            ptr = buf.data();
     213            end = ptr + buf.size();
     214            unsigned int compressed_block_size;
     215            if(!unpack_uint(&ptr, end, &compressed_block_size))
     216                throw NetworkError("Invalid v2 cphangeset");
     217            buf.erase(0, ptr - buf.data());
     218
     219            if (compressed_block_size > 0) {
     220                string ubuf;
     221                ubuf.reserve(changeset_blocksize);
     222                conn.get_message_chunk(buf, changeset_blocksize, end_time);
     223                comp_stream.lazy_alloc_inflate_zstream();
     224                comp_stream.inflate_zstream->next_in = (Bytef*)const_cast<char *>(buf.data());
     225                comp_stream.inflate_zstream->avail_in = (uInt)(buf.size());
     226                comp_stream.inflate_zstream->next_out = out;
     227                comp_stream.inflate_zstream->avail_out = (uInt)sizeof(out);
     228                int err = inflate(comp_stream.inflate_zstream, Z_FINISH);
     229                if (err == Z_STREAM_END) {
     230                    std::cout << comp_stream.inflate_zstream->next_out - out << std::endl;
     231                    std::cout << out << std::endl;
     232                    ubuf.append(reinterpret_cast<const char *>(out),
     233                                comp_stream.inflate_zstream->next_out - out);
     234                    swap(buf, ubuf);
     235                }
     236            } else {
     237                conn.get_message_chunk(buf, changeset_blocksize, end_time);
     238            }
    209239            if (buf.size() < changeset_blocksize)
    210240                throw NetworkError("Incomplete block in changeset");
    211241
    BrassDatabaseReplicator::apply_changeset_from_conn(RemoteConnection & conn,  
    255285    buf.erase(0, 12);
    256286    const char *ptr = buf.data();
    257287    const char *end = ptr + buf.size();
    258 
    259288    unsigned int changes_version;
    260289    if (!unpack_uint(&ptr, end, &changes_version))
    261290        throw NetworkError("Couldn't read a valid version number from changeset");
  • xapian-core/backends/brass/brass_databasereplicator.h

    diff --git a/xapian-core/backends/brass/brass_databasereplicator.h b/xapian-core/backends/brass/brass_databasereplicator.h
    index 3176589..915fb1f 100644
    a b  
    2424#define XAPIAN_INCLUDED_BRASS_DATABASEREPLICATOR_H
    2525
    2626#include "backends/databasereplicator.h"
     27#include "zlib_utils.h"
    2728
    2829class BrassDatabaseReplicator : public Xapian::DatabaseReplicator {
    2930    private:
    class BrassDatabaseReplicator : public Xapian::DatabaseReplicator {  
    3132         */
    3233        std::string db_dir;
    3334
     35        CompressionStream comp_stream;
     36
    3437        /** Process a chunk which holds a base block.
    3538         */
    3639        void process_changeset_chunk_base(const std::string & tablename,
  • xapian-core/backends/brass/brass_replicate_internal.h

    diff --git a/xapian-core/backends/brass/brass_replicate_internal.h b/xapian-core/backends/brass/brass_replicate_internal.h
    index d108710..24d140d 100644
    a b  
    2828
    2929// The current version of changeset files.
    3030// 1  - initial implementation
    31 #define CHANGES_VERSION 1u
     31// 2  - compressed changesets
     32#define CHANGES_VERSION 2u
    3233
    3334// Must be big enough to ensure that the start of the changeset (up to the new
    3435// revision number) will fit in this much space.
  • xapian-core/backends/brass/brass_table.cc

    diff --git a/xapian-core/backends/brass/brass_table.cc b/xapian-core/backends/brass/brass_table.cc
    index d8ddc74..e77312b 100644
    a b BrassTable::add(const string &key, string tag, bool already_compressed)  
    10301030        CompileTimeAssert(DONT_COMPRESS != Z_RLE);
    10311031#endif
    10321032
    1033         lazy_alloc_deflate_zstream();
     1033        comp_stream.lazy_alloc_deflate_zstream();
    10341034
    1035         deflate_zstream->next_in = (Bytef *)const_cast<char *>(tag.data());
    1036         deflate_zstream->avail_in = (uInt)tag.size();
     1035        comp_stream.deflate_zstream->next_in = (Bytef *)const_cast<char *>(tag.data());
     1036        comp_stream.deflate_zstream->avail_in = (uInt)tag.size();
    10371037
    10381038        // If compressed size is >= tag.size(), we don't want to compress.
    10391039        unsigned long blk_len = tag.size() - 1;
    10401040        unsigned char * blk = new unsigned char[blk_len];
    1041         deflate_zstream->next_out = blk;
    1042         deflate_zstream->avail_out = (uInt)blk_len;
     1041        comp_stream.deflate_zstream->next_out = blk;
     1042        comp_stream.deflate_zstream->avail_out = (uInt)blk_len;
    10431043
    1044         int err = deflate(deflate_zstream, Z_FINISH);
     1044        int err = deflate(comp_stream.deflate_zstream, Z_FINISH);
    10451045        if (err == Z_STREAM_END) {
    10461046            // If deflate succeeded, then the output was at least one byte
    10471047            // smaller than the input.
    1048             tag.assign(reinterpret_cast<const char *>(blk), deflate_zstream->total_out);
     1048            tag.assign(reinterpret_cast<const char *>(blk), comp_stream.deflate_zstream->total_out);
    10491049            compressed = true;
    10501050        } else {
    10511051            // Deflate failed - presumably the data wasn't compressible.
    BrassTable::read_tag(Brass::Cursor * C_, string *tag, bool keep_compressed) cons  
    12401240
    12411241    Bytef buf[8192];
    12421242
    1243     lazy_alloc_inflate_zstream();
     1243    comp_stream.lazy_alloc_inflate_zstream();
    12441244
    1245     inflate_zstream->next_in = (Bytef*)const_cast<char *>(tag->data());
    1246     inflate_zstream->avail_in = (uInt)tag->size();
     1245    comp_stream.inflate_zstream->next_in = (Bytef*)const_cast<char *>(tag->data());
     1246    comp_stream.inflate_zstream->avail_in = (uInt)tag->size();
    12471247
    12481248    int err = Z_OK;
    12491249    while (err != Z_STREAM_END) {
    1250         inflate_zstream->next_out = buf;
    1251         inflate_zstream->avail_out = (uInt)sizeof(buf);
    1252         err = inflate(inflate_zstream, Z_SYNC_FLUSH);
    1253         if (err == Z_BUF_ERROR && inflate_zstream->avail_in == 0) {
    1254             LOGLINE(DB, "Z_BUF_ERROR - faking checksum of " << inflate_zstream->adler);
     1250        comp_stream.inflate_zstream->next_out = buf;
     1251        comp_stream.inflate_zstream->avail_out = (uInt)sizeof(buf);
     1252        err = inflate(comp_stream.inflate_zstream, Z_SYNC_FLUSH);
     1253        if (err == Z_BUF_ERROR && comp_stream.inflate_zstream->avail_in == 0) {
     1254            LOGLINE(DB, "Z_BUF_ERROR - faking checksum of " << comp_stream.inflate_zstream->adler);
    12551255            Bytef header2[4];
    1256             setint4(header2, 0, inflate_zstream->adler);
    1257             inflate_zstream->next_in = header2;
    1258             inflate_zstream->avail_in = 4;
    1259             err = inflate(inflate_zstream, Z_SYNC_FLUSH);
     1256            setint4(header2, 0, comp_stream.inflate_zstream->adler);
     1257            comp_stream.inflate_zstream->next_in = header2;
     1258            comp_stream.inflate_zstream->avail_in = 4;
     1259            err = inflate(comp_stream.inflate_zstream, Z_SYNC_FLUSH);
    12601260            if (err == Z_STREAM_END) break;
    12611261        }
    12621262
    12631263        if (err != Z_OK && err != Z_STREAM_END) {
    12641264            if (err == Z_MEM_ERROR) throw std::bad_alloc();
    12651265            string msg = "inflate failed";
    1266             if (inflate_zstream->msg) {
     1266            if (comp_stream.inflate_zstream->msg) {
    12671267                msg += " (";
    1268                 msg += inflate_zstream->msg;
     1268                msg += comp_stream.inflate_zstream->msg;
    12691269                msg += ')';
    12701270            }
    12711271            throw Xapian::DatabaseError(msg);
    12721272        }
    12731273
    12741274        utag.append(reinterpret_cast<const char *>(buf),
    1275                     inflate_zstream->next_out - buf);
     1275                    comp_stream.inflate_zstream->next_out - buf);
    12761276    }
    1277     if (utag.size() != inflate_zstream->total_out) {
     1277    if (utag.size() != comp_stream.inflate_zstream->total_out) {
    12781278        string msg = "compressed tag didn't expand to the expected size: ";
    12791279        msg += str(utag.size());
    12801280        msg += " != ";
    12811281        // OpenBSD's zlib.h uses off_t instead of uLong for total_out.
    1282         msg += str((size_t)inflate_zstream->total_out);
     1282        msg += str((size_t)comp_stream.inflate_zstream->total_out);
    12831283        throw Xapian::DatabaseCorruptError(msg);
    12841284    }
    12851285
    BrassTable::BrassTable(const char * tablename_, const string & path_,  
    15661566          cursor_version(0),
    15671567          split_p(0),
    15681568          compress_strategy(compress_strategy_),
    1569           deflate_zstream(NULL),
    1570           inflate_zstream(NULL),
     1569          comp_stream(compress_strategy_),
    15711570          lazy(lazy_)
    15721571{
    15731572    LOGCALL_CTOR(DB, "BrassTable", tablename_ | path_ | readonly_ | compress_strategy_ | lazy_);
    BrassTable::really_empty() const  
    15871586    return !cur.next();
    15881587}
    15891588
    1590 void
    1591 BrassTable::lazy_alloc_deflate_zstream() const {
    1592     if (usual(deflate_zstream)) {
    1593         if (usual(deflateReset(deflate_zstream) == Z_OK)) return;
    1594         // Try to recover by deleting the stream and starting from scratch.
    1595         delete deflate_zstream;
    1596     }
    1597 
    1598     deflate_zstream = new z_stream;
    1599 
    1600     deflate_zstream->zalloc = reinterpret_cast<alloc_func>(0);
    1601     deflate_zstream->zfree = reinterpret_cast<free_func>(0);
    1602     deflate_zstream->opaque = (voidpf)0;
    1603 
    1604     // -15 means raw deflate with 32K LZ77 window (largest)
    1605     // memLevel 9 is the highest (8 is default)
    1606     int err;
    1607     err = deflateInit2(deflate_zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
    1608                        -15, 9, compress_strategy);
    1609     if (rare(err != Z_OK)) {
    1610         if (err == Z_MEM_ERROR) {
    1611             delete deflate_zstream;
    1612             deflate_zstream = 0;
    1613             throw std::bad_alloc();
    1614         }
    1615         string msg = "deflateInit2 failed (";
    1616         if (deflate_zstream->msg) {
    1617             msg += deflate_zstream->msg;
    1618         } else {
    1619             msg += str(err);
    1620         }
    1621         msg += ')';
    1622         delete deflate_zstream;
    1623         deflate_zstream = 0;
    1624         throw Xapian::DatabaseError(msg);
    1625     }
    1626 }
    1627 
    1628 void
    1629 BrassTable::lazy_alloc_inflate_zstream() const {
    1630     if (usual(inflate_zstream)) {
    1631         if (usual(inflateReset(inflate_zstream) == Z_OK)) return;
    1632         // Try to recover by deleting the stream and starting from scratch.
    1633         delete inflate_zstream;
    1634     }
    1635 
    1636     inflate_zstream = new z_stream;
    1637 
    1638     inflate_zstream->zalloc = reinterpret_cast<alloc_func>(0);
    1639     inflate_zstream->zfree = reinterpret_cast<free_func>(0);
    1640 
    1641     inflate_zstream->next_in = Z_NULL;
    1642     inflate_zstream->avail_in = 0;
    1643 
    1644     int err = inflateInit2(inflate_zstream, -15);
    1645     if (rare(err != Z_OK)) {
    1646         if (err == Z_MEM_ERROR) {
    1647             delete inflate_zstream;
    1648             inflate_zstream = 0;
    1649             throw std::bad_alloc();
    1650         }
    1651         string msg = "inflateInit2 failed (";
    1652         if (inflate_zstream->msg) {
    1653             msg += inflate_zstream->msg;
    1654         } else {
    1655             msg += str(err);
    1656         }
    1657         msg += ')';
    1658         delete inflate_zstream;
    1659         inflate_zstream = 0;
    1660         throw Xapian::DatabaseError(msg);
    1661     }
    1662 }
    1663 
    16641589bool
    16651590BrassTable::exists() const {
    16661591    LOGCALL(DB, bool, "BrassTable::exists", NO_ARGS);
    BrassTable::create_and_open(unsigned int block_size_)  
    17281653BrassTable::~BrassTable() {
    17291654    LOGCALL_DTOR(DB, "BrassTable");
    17301655    BrassTable::close();
    1731 
    1732     if (deflate_zstream) {
    1733         // Errors which we care about have already been handled, so just ignore
    1734         // any which get returned here.
    1735         (void) deflateEnd(deflate_zstream);
    1736         delete deflate_zstream;
    1737     }
    1738 
    1739     if (inflate_zstream) {
    1740         // Errors which we care about have already been handled, so just ignore
    1741         // any which get returned here.
    1742         (void) inflateEnd(inflate_zstream);
    1743         delete inflate_zstream;
    1744     }
    17451656}
    17461657
    17471658void BrassTable::close(bool permanent) {
    BrassTable::commit(brass_revision_number_t revision, int changes_fd,  
    18951806}
    18961807
    18971808void
    1898 BrassTable::write_changed_blocks(int changes_fd)
     1809BrassTable::write_changed_blocks(int changes_fd, bool compressed)
    18991810{
    1900     LOGCALL_VOID(DB, "BrassTable::write_changed_blocks", changes_fd);
     1811    LOGCALL_VOID(DB, "BrassTable::write_changed_blocks", changes_fd | compressed);
    19011812    Assert(changes_fd >= 0);
    19021813    if (handle < 0) return;
    19031814    if (faked_root_block) return;
    BrassTable::write_changed_blocks(int changes_fd)  
    19061817    pack_uint(buf, 2u); // Indicate the item is a list of blocks
    19071818    pack_string(buf, tablename);
    19081819    pack_uint(buf, block_size);
    1909     io_write(changes_fd, buf.data(), buf.size());
    19101820
     1821    // Write the table name and block size to the file
     1822    io_write(changes_fd, buf.data(), buf.size());
     1823       
    19111824    // Compare the old and new bitmaps to find blocks which have changed, and
    19121825    // write them to the file descriptor.
    19131826    uint4 n = 0;
    BrassTable::write_changed_blocks(int changes_fd)  
    19171830        while (base.find_changed_block(&n)) {
    19181831            buf.resize(0);
    19191832            pack_uint(buf, n + 1);
     1833            // Write the block number to the file
    19201834            io_write(changes_fd, buf.data(), buf.size());
    19211835
    19221836            // Read block n.
    19231837            read_block(n, p);
    19241838
    19251839            // Write block n to the file.
    1926             io_write(changes_fd, reinterpret_cast<const char *>(p), block_size);
     1840            if (compressed) {
     1841                comp_stream.lazy_alloc_deflate_zstream();
     1842                comp_stream.compress(p, block_size);
     1843                if (comp_stream.zerr == Z_STREAM_END) {
     1844                    buf.resize(0);
     1845                    pack_uint(buf, comp_stream.deflate_zstream->total_out);
     1846                    io_write(changes_fd, buf.data(), buf.size());
     1847                    io_write(changes_fd, reinterpret_cast<const char *>(comp_stream.out),
     1848                             comp_stream.deflate_zstream->total_out);
     1849                } else {
     1850                    // The deflate failed, try to write data uncompressed
     1851                    buf.resize(0);
     1852                    pack_uint(buf, 0u);
     1853                    io_write(changes_fd, buf.data(), buf.size());
     1854                    io_write(changes_fd, reinterpret_cast<const char *>(p), block_size);
     1855                }
     1856            }
     1857            else {
     1858                buf.resize(0);
     1859                pack_uint(buf, 0u);
     1860                io_write(changes_fd, buf.data(), buf.size());
     1861                io_write(changes_fd, reinterpret_cast<const char *>(p), block_size);
     1862            }
    19271863            ++n;
    19281864        }
    19291865        delete[] p;
    BrassTable::write_changed_blocks(int changes_fd)  
    19341870    }
    19351871    buf.resize(0);
    19361872    pack_uint(buf, 0u);
     1873    // Write 0 for end of blocks
    19371874    io_write(changes_fd, buf.data(), buf.size());
    19381875}
    19391876
  • xapian-core/backends/brass/brass_table.h

    diff --git a/xapian-core/backends/brass/brass_table.h b/xapian-core/backends/brass/brass_table.h
    index 86c92a8..a51df2f 100644
    a b  
    3636#include "stringutils.h"
    3737#include "unaligned.h"
    3838
     39#include "common/zlib_utils.h"
     40
    3941#include <algorithm>
    4042#include <string>
    4143
    42 #include <zlib.h>
    43 
    4444#define DONT_COMPRESS -1
    4545
    4646/** Even for items of at maximum size, it must be possible to get this number of
    class BrassTable {  
    406406         *
    407407         *  @param changes_fd  The file descriptor to write changes to.
    408408         */
    409         void write_changed_blocks(int changes_fd);
     409        void write_changed_blocks(int changes_fd, bool compressed);
    410410
    411411        /** Cancel any outstanding changes.
    412412         *
    class BrassTable {  
    649649        /// The name of the table (used when writing changesets).
    650650        const char * tablename;
    651651
    652         /// Allocate the zstream for deflating, if not already allocated.
    653         void lazy_alloc_deflate_zstream() const;
    654 
    655         /// Allocate the zstream for inflating, if not already allocated.
    656         void lazy_alloc_inflate_zstream() const;
    657652
    658653        /** revision number of the opened B-tree. */
    659654        brass_revision_number_t revision_number;
    class BrassTable {  
    784779        /** DONT_COMPRESS or Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY,
    785780         *  Z_RLE. */
    786781        int compress_strategy;
    787 
    788         /// Zlib state object for deflating
    789         mutable z_stream *deflate_zstream;
    790 
    791         /// Zlib state object for inflating
    792         mutable z_stream *inflate_zstream;
     782       
     783        CompressionStream comp_stream;
    793784
    794785        /// If true, don't create the table until it's needed.
    795786        bool lazy;
  • xapian-core/common/Makefile.mk

    diff --git a/xapian-core/common/Makefile.mk b/xapian-core/common/Makefile.mk
    index 8865242..0211f8a 100644
    a b noinst_HEADERS +=\  
    3737        common/str.h\
    3838        common/stringutils.h\
    3939        common/submatch.h\
     40        common/unaligned.h\
     41        common/zlib_utils.h
    4042        common/unaligned.h
    4143
     44
    4245EXTRA_DIST +=\
    4346        common/dir_contents\
    4447        common/win32_uuid.cc\
    lib_src +=\  
    5962        common/serialise-double.cc\
    6063        common/socket_utils.cc\
    6164        common/str.cc\
    62         common/stringutils.cc
     65        common/stringutils.cc\
     66        common/zlib_utils.cc
     67
    6368
    6469if USE_WIN32_UUID_API
    6570lib_src +=\
  • new file xapian-core/common/zlib_utils.cc

    diff --git a/xapian-core/common/zlib_utils.cc b/xapian-core/common/zlib_utils.cc
    new file mode 100644
    index 0000000..8761f48
    - +  
     1#include <config.h>
     2
     3#include "zlib_utils.h"
     4#include "str.h"
     5#include "stringutils.h"
     6
     7CompressionStream::CompressionStream(int compress_strategy_)
     8    : compress_strategy(compress_strategy_),
     9      zerr(0),
     10      out_len(0),
     11      out(NULL),
     12      deflate_zstream(NULL),
     13      inflate_zstream(NULL)
     14{
     15    // LOGCALL_CTOR()
     16}
     17
     18CompressionStream::~CompressionStream() {
     19    if (deflate_zstream) {
     20        // Errors which we care about have already been handled, so just ignore
     21        // any which get returned here.
     22        (void) deflateEnd(deflate_zstream);
     23        delete deflate_zstream;
     24    }
     25
     26    if (inflate_zstream) {
     27        // Errors which we care about have already been handled, so just ignore
     28        // any which get returned here.
     29        (void) inflateEnd(inflate_zstream);
     30        delete inflate_zstream;
     31    }
     32   
     33    if (out) {
     34        delete [] out;
     35    }
     36}
     37
     38
     39void
     40CompressionStream::compress(string & buf) {
     41    out_len = buf.size() - 1;
     42    out = new unsigned char[out_len];
     43    deflate_zstream->avail_in = (uInt)buf.size();
     44    deflate_zstream->next_in = (Bytef *)const_cast<char *>(buf.data());
     45    deflate_zstream->next_out = out;
     46    deflate_zstream->avail_out = (uInt)out_len;
     47    zerr = deflate(deflate_zstream, Z_FINISH);
     48}
     49
     50
     51void
     52CompressionStream::compress(byte * buf, int size) {
     53    out_len = size - 1;
     54    out = new unsigned char[out_len];
     55    deflate_zstream->avail_in = (uInt)size;
     56    deflate_zstream->next_in = (Bytef *)(buf);
     57    deflate_zstream->next_out = out;
     58    deflate_zstream->avail_out = (uInt)out_len;
     59    zerr = deflate(deflate_zstream, Z_FINISH);
     60}
     61
     62// void
     63// CompressionStream::decompress(string & buf) {
     64//     inflate_zstream->next_in = (Bytef*)const_cast<char *>(tag->data());
     65//     inflate_zstream->avail_in = (uInt)tag->size();
     66//     int err = Z_OK;
     67//     while (err != Z_STREAM_END) {
     68//      comp_stream.inflate_zstream->next_out = buf;
     69//      comp_stream.inflate_zstream->avail_out = (uInt)sizeof(buf);
     70//      err = inflate(comp_stream.inflate_zstream, Z_SYNC_FLUSH);
     71//      if (err == Z_BUF_ERROR && comp_stream.inflate_zstream->avail_in == 0) {
     72//          LOGLINE(DB, "Z_BUF_ERROR - faking checksum of " << comp_stream.inflate_zstream->adler);
     73//          Bytef header2[4];
     74//          setint4(header2, 0, comp_stream.inflate_zstream->adler);
     75//          comp_stream.inflate_zstream->next_in = header2;
     76//          comp_stream.inflate_zstream->avail_in = 4;
     77//          err = inflate(comp_stream.inflate_zstream, Z_SYNC_FLUSH);
     78//          if (err == Z_STREAM_END) break;
     79//      }
     80
     81//      if (err != Z_OK && err != Z_STREAM_END) {
     82//          if (err == Z_MEM_ERROR) throw std::bad_alloc();
     83//          string msg = "inflate failed";
     84//          if (comp_stream.inflate_zstream->msg) {
     85//              msg += " (";
     86//              msg += comp_stream.inflate_zstream->msg;
     87//              msg += ')';
     88//          }
     89//          throw Xapian::DatabaseError(msg);
     90//      }
     91
     92//      utag.append(reinterpret_cast<const char *>(buf),
     93//                  comp_stream.inflate_zstream->next_out - buf);
     94//     }
     95//     if (utag.size() != comp_stream.inflate_zstream->total_out) {
     96//      string msg = "compressed tag didn't expand to the expected size: ";
     97//      msg += str(utag.size());
     98//      msg += " != ";
     99//      // OpenBSD's zlib.h uses off_t instead of uLong for total_out.
     100//      msg += str((size_t)comp_stream.inflate_zstream->total_out);
     101//      throw Xapian::DatabaseCorruptError(msg);
     102//     }
     103// }
     104
     105
     106void
     107CompressionStream::lazy_alloc_deflate_zstream() const {
     108    if (usual(deflate_zstream)) {
     109        if (usual(deflateReset(deflate_zstream) == Z_OK)) return;
     110        // Try to recover by deleting the stream and starting from scratch.
     111        delete deflate_zstream;
     112    }
     113
     114    deflate_zstream = new z_stream;
     115
     116    deflate_zstream->zalloc = reinterpret_cast<alloc_func>(0);
     117    deflate_zstream->zfree = reinterpret_cast<free_func>(0);
     118    deflate_zstream->opaque = (voidpf)0;
     119
     120    // -15 means raw deflate with 32K LZ77 window (largest)
     121    // memLevel 9 is the highest (8 is default)
     122    int err;
     123    // FIXME:dc: this needs to really use compress_strategy if set
     124    err = deflateInit2(deflate_zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
     125                       -15, 9, Z_DEFAULT_STRATEGY);
     126    if (rare(err != Z_OK)) {
     127        if (err == Z_MEM_ERROR) {
     128            delete deflate_zstream;
     129            deflate_zstream = 0;
     130            throw std::bad_alloc();
     131        }
     132        string msg = "deflateInit2 failed (";
     133        if (deflate_zstream->msg) {
     134            msg += deflate_zstream->msg;
     135        } else {
     136            msg += str(err);
     137        }
     138        msg += ')';
     139        delete deflate_zstream;
     140        deflate_zstream = 0;
     141        throw Xapian::DatabaseError(msg);
     142    }
     143}
     144
     145void
     146CompressionStream::lazy_alloc_inflate_zstream() const {
     147    if (usual(inflate_zstream)) {
     148        if (usual(inflateReset(inflate_zstream) == Z_OK)) return;
     149        // Try to recover by deleting the stream and starting from scratch.
     150        delete inflate_zstream;
     151    }
     152
     153    inflate_zstream = new z_stream;
     154
     155    inflate_zstream->zalloc = reinterpret_cast<alloc_func>(0);
     156    inflate_zstream->zfree = reinterpret_cast<free_func>(0);
     157
     158    inflate_zstream->next_in = Z_NULL;
     159    inflate_zstream->avail_in = 0;
     160
     161    int err = inflateInit2(inflate_zstream, -15);
     162    if (rare(err != Z_OK)) {
     163        if (err == Z_MEM_ERROR) {
     164            delete inflate_zstream;
     165            inflate_zstream = 0;
     166            throw std::bad_alloc();
     167        }
     168        string msg = "inflateInit2 failed (";
     169        if (inflate_zstream->msg) {
     170            msg += inflate_zstream->msg;
     171        } else {
     172            msg += str(err);
     173        }
     174        msg += ')';
     175        delete inflate_zstream;
     176        inflate_zstream = 0;
     177        throw Xapian::DatabaseError(msg);
     178    }
     179}
  • new file xapian-core/common/zlib_utils.h

    diff --git a/xapian-core/common/zlib_utils.h b/xapian-core/common/zlib_utils.h
    new file mode 100644
    index 0000000..6fbcee8
    - +  
     1#ifndef XAPIAN_INCLUDED_ZLIB_UTILS_H
     2#define XAPIAN_INCLUDED_ZLIB_UTILS_H
     3
     4#include "debuglog.h"
     5
     6#include "internaltypes.h"
     7
     8#include "xapian/error.h"
     9
     10#include <zlib.h>
     11
     12using namespace std;
     13
     14#define DONT_COMPRESS -1
     15
     16class CompressionStream {
     17
     18 public:
     19    CompressionStream(int);
     20
     21    ~CompressionStream();
     22
     23    int compress_strategy;
     24
     25    int zerr;
     26
     27    unsigned long out_len;
     28   
     29    unsigned char * out;
     30
     31    /// Zlib state object for deflating
     32    mutable z_stream *deflate_zstream;
     33
     34    /// Zlib state object for inflating
     35    mutable z_stream *inflate_zstream;
     36
     37    /// Allocate the zstream for deflating, if not already allocated.
     38    void lazy_alloc_deflate_zstream() const;
     39
     40    /// Allocate the zstream for inflating, if not already allocated.
     41    void lazy_alloc_inflate_zstream() const;
     42
     43    void compress(string &);
     44    void compress(byte *, int);
     45};
     46
     47#endif // XAPIAN_INCLUDED_ZLIB_UTILS_H
  • xapian-core/tests/api_query.cc

    diff --git a/xapian-core/tests/api_query.cc b/xapian-core/tests/api_query.cc
    index 0d8a8e2..64bd6c9 100644
    a b DEFINE_TESTCASE(overload1, !backend) {  
    6666    Xapian::Query q;
    6767    q = Xapian::Query("foo") & Xapian::Query("bar");
    6868    TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND bar))");
    69     q = Xapian::Query("foo") &~ Xapian::Query("bar");
     69    q = Xapian::Query("foo") & (~Xapian::Query("bar"));
    7070    TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND_NOT bar))");
    7171    q = ~Xapian::Query("bar");
    7272    TEST_STRINGS_EQUAL(q.get_description(), "Query((<alldocuments> AND_NOT bar))");