Ticket #278: max_changesets_post_review.diff

File max_changesets_post_review.diff, 16.6 KB (added by Dan, 14 years ago)
  • xapian-core/backends/brass/brass_database.cc

    diff --git a/xapian-core/backends/brass/brass_database.cc b/xapian-core/backends/brass/brass_database.cc
    index 56f534e..0f5fe00 100644
    a b  
    77 * Copyright 2006,2008 Lemur Consulting Ltd
    88 * Copyright 2009 Richard Boulton
    99 * Copyright 2009 Kan-Ru Chen
     10 * Copyright 2011 Dan Colish
    1011 *
    1112 * This program is free software; you can redistribute it and/or
    1213 * modify it under the terms of the GNU General Public License as
    BrassDatabase::BrassDatabase(const string &brass_dir, int action,  
    120121        return;
    121122    }
    122123
    123     const char *p = getenv("XAPIAN_MAX_CHANGESETS");
    124     if (p)
    125         max_changesets = atoi(p);
    126 
    127124    if (action != Xapian::DB_OPEN && !database_exists()) {
    128125        // FIXME: if we allow Xapian::DB_OVERWRITE, check it here
    129126
    BrassDatabase::set_revision_number(brass_revision_number_t new_revision)  
    400397
    401398    int changes_fd = -1;
    402399    string changes_name;
     400   
     401    // always check max_changesets for modification since last revision
     402    const char *p = getenv("XAPIAN_MAX_CHANGESETS");
     403    if (p) {
     404        max_changesets = atoi(p);
     405    }
    403406
    404407    if (max_changesets > 0) {
    405408        brass_revision_number_t old_revision = get_revision_number();
    BrassDatabase::set_revision_number(brass_revision_number_t new_revision)  
    470473
    471474        throw;
    472475    }
     476   
     477    // Only remove the oldest_changeset if we successfully write a new changeset and
     478    // we have a revision number greater than max_changesets
     479    if (changes_fd >= 0 && max_changesets < new_revision) {
     480        // use the oldest changeset we know about to begin deleting to the stop_changeset
     481        // if nothing went wrong only one file should be deleted, otherwise
     482        // attempts will be made to clean up more
     483        brass_revision_number_t oldest_changeset = stats.get_oldest_changeset();
     484        brass_revision_number_t stop_changeset = new_revision - max_changesets;
     485        while (oldest_changeset < stop_changeset) {
     486            if (io_unlink(db_dir + "/changes" + str(oldest_changeset))) {
     487                LOGLINE(DB, "Removed changeset " << oldest_changeset);
     488            } else {
     489                LOGLINE(DB, "Skipping changeset " << oldest_changeset <<
     490                        ", likely removed before");
     491            }
     492            stats.set_oldest_changeset(oldest_changeset++);
     493        }
     494    }
    473495}
    474496
    475497void
  • xapian-core/backends/brass/brass_database.h

    diff --git a/xapian-core/backends/brass/brass_database.h b/xapian-core/backends/brass/brass_database.h
    index 6f8e928..5e14d72 100644
    a b  
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010 Olly Betts
    66 * Copyright 2008 Lemur Consulting Ltd
     7 * Copyright 2011 Dan Colish
    78 *
    89 * This program is free software; you can redistribute it and/or
    910 * modify it under the terms of the GNU General Public License as
    class BrassDatabase : public Xapian::Database::Internal {  
    216217        void get_changeset_revisions(const string & path,
    217218                                     brass_revision_number_t * startrev,
    218219                                     brass_revision_number_t * endrev) const;
     220
    219221    public:
    220222        /** Create and open a brass database.
    221223         *
  • xapian-core/backends/brass/brass_dbstats.cc

    diff --git a/xapian-core/backends/brass/brass_dbstats.cc b/xapian-core/backends/brass/brass_dbstats.cc
    index d4bff9e..9d5bcec 100644
    a b  
    22 * @brief Brass class for database statistics.
    33 */
    44/* Copyright (C) 2009 Olly Betts
     5 * Copyright (C) 2011 Dan Colish
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
    BrassDatabaseStats::read(BrassPostListTable & postlist_table)  
    3536    string data;
    3637    if (!postlist_table.get_exact_entry(DATABASE_STATS_KEY, data)) {
    3738        // If there's no entry yet, then all the values are zero.
    38         total_doclen = 0;
    39         last_docid = 0;
    40         doclen_lbound = 0;
    41         doclen_ubound = 0;
    42         wdf_ubound = 0;
     39        zero();
    4340        return;
    4441    }
    4542
    BrassDatabaseStats::read(BrassPostListTable & postlist_table)  
    5047        unpack_uint(&p, end, &doclen_lbound) &&
    5148        unpack_uint(&p, end, &wdf_ubound) &&
    5249        unpack_uint(&p, end, &doclen_ubound) &&
     50        unpack_uint(&p, end, &oldest_changeset) &&
    5351        unpack_uint_last(&p, end, &total_doclen)) {
    5452        // doclen_ubound should always be >= wdf_ubound, so we store the
    5553        // difference as it may encode smaller.  wdf_ubound is likely to
    BrassDatabaseStats::write(BrassPostListTable & postlist_table) const  
    7573    // difference as it may encode smaller.  wdf_ubound is likely to
    7674    // be larger than doclen_lbound.
    7775    pack_uint(data, doclen_ubound - wdf_ubound);
     76    pack_uint(data, oldest_changeset);
    7877    // Micro-optimisation: total_doclen is likely to be the largest value, so
    7978    // store it last as pack_uint_last() uses a slightly more compact encoding
    8079    // - this could save us a few bytes!
  • xapian-core/backends/brass/brass_dbstats.h

    diff --git a/xapian-core/backends/brass/brass_dbstats.h b/xapian-core/backends/brass/brass_dbstats.h
    index d3a281e..131afd8 100644
    a b  
    22 * @brief Brass class for database statistics.
    33 */
    44/* Copyright (C) 2009 Olly Betts
     5 * Copyright (C) 2011 Dan Colish
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
    class BrassDatabaseStats {  
    5152    /// An upper bound on the greatest wdf in this database.
    5253    Xapian::termcount wdf_ubound;
    5354
     55    /// Oldest changeset removed when max_changesets is set
     56    brass_revision_number_t oldest_changeset;
     57
    5458  public:
    5559    BrassDatabaseStats()
    5660        : total_doclen(0), last_docid(0), doclen_lbound(0), doclen_ubound(0),
    57           wdf_ubound(0) { }
     61          wdf_ubound(0), oldest_changeset(0) { }
    5862
    5963    totlen_t get_total_doclen() const { return total_doclen; }
    6064
    class BrassDatabaseStats {  
    7074
    7175    Xapian::termcount get_wdf_upper_bound() const { return wdf_ubound; }
    7276
     77    brass_revision_number_t get_oldest_changeset() const { return oldest_changeset; }
     78
    7379    void zero() {
    7480        total_doclen = 0;
    7581        last_docid = 0;
    7682        doclen_lbound = 0;
    7783        doclen_ubound = 0;
    7884        wdf_ubound = 0;
     85        oldest_changeset = 0;
    7986    }
    8087
    8188    void read(BrassPostListTable & postlist_table);
    8289
    8390    void set_last_docid(Xapian::docid did) { last_docid = did; }
    8491
     92    void set_oldest_changeset(brass_revision_number_t changeset) { oldest_changeset = changeset; }
     93
    8594    void add_document(Xapian::termcount doclen) {
    8695        if (total_doclen == 0 || (doclen && doclen < doclen_lbound))
    8796            doclen_lbound = doclen;
  • xapian-core/backends/brass/brass_version.cc

    diff --git a/xapian-core/backends/brass/brass_version.cc b/xapian-core/backends/brass/brass_version.cc
    index b54fb4b..b0190a9 100644
    a b  
    22 * @brief BrassVersion class
    33 */
    44/* Copyright (C) 2006,2007,2008,2009,2010 Olly Betts
     5 * Copyright (C) 2011 Dan Colish
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    4344using namespace std;
    4445
    4546// YYYYMMDDX where X allows multiple format revisions in a day
    46 #define BRASS_VERSION 200912150
     47#define BRASS_VERSION 201103110
     48// 201103110 1.2.5? Bump for new max changesets dbstats
    4749// 200912150 1.1.4 Brass debuts.
    4850
    4951#define MAGIC_STRING "IAmBrass"
  • xapian-core/backends/chert/chert_database.cc

    diff --git a/xapian-core/backends/chert/chert_database.cc b/xapian-core/backends/chert/chert_database.cc
    index 3cc6b56..7ebb9dc 100644
    a b  
    77 * Copyright 2006,2008 Lemur Consulting Ltd
    88 * Copyright 2009,2010 Richard Boulton
    99 * Copyright 2009 Kan-Ru Chen
     10 * Copyright 2011 Dan Colish
    1011 *
    1112 * This program is free software; you can redistribute it and/or
    1213 * modify it under the terms of the GNU General Public License as
    ChertDatabase::ChertDatabase(const string &chert_dir, int action,  
    123124        return;
    124125    }
    125126
    126     const char *p = getenv("XAPIAN_MAX_CHANGESETS");
    127     if (p)
    128         max_changesets = atoi(p);
    129 
    130127    if (action != Xapian::DB_OPEN && !database_exists()) {
    131128        // FIXME: if we allow Xapian::DB_OVERWRITE, check it here
    132129
    ChertDatabase::set_revision_number(chert_revision_number_t new_revision)  
    404401    int changes_fd = -1;
    405402    string changes_name;
    406403
     404    const char *p = getenv("XAPIAN_MAX_CHANGESETS");
     405    if (p)
     406        max_changesets = atoi(p);
     407
    407408    if (max_changesets > 0) {
    408409        chert_revision_number_t old_revision = get_revision_number();
    409410        if (old_revision) {
    ChertDatabase::set_revision_number(chert_revision_number_t new_revision)  
    465466
    466467        throw;
    467468    }
     469   
     470    if (changes_fd >= 0 && max_changesets < new_revision) {
     471        // While change sets less than N - max_changesets exist, delete them
     472        // 1 must be subtracted so we don't delete the changeset we just wrote
     473        // when max_changesets = 1
     474        unsigned rev = new_revision - max_changesets - 1;
     475        while (io_unlink(db_dir + "/changes" + str(rev--))) { }
     476    }
    468477}
    469478
    470479void
  • xapian-core/backends/flint/flint_database.cc

    diff --git a/xapian-core/backends/flint/flint_database.cc b/xapian-core/backends/flint/flint_database.cc
    index d5eb8a1..297530c 100644
    a b  
    77 * Copyright 2006,2008 Lemur Consulting Ltd
    88 * Copyright 2009,2010 Richard Boulton
    99 * Copyright 2009 Kan-Ru Chen
     10 * Copyright 2011 Dan Colish
    1011 *
    1112 * This program is free software; you can redistribute it and/or
    1213 * modify it under the terms of the GNU General Public License as
    FlintDatabase::FlintDatabase(const string &flint_dir, int action,  
    121122        return;
    122123    }
    123124
    124     const char *p = getenv("XAPIAN_MAX_CHANGESETS");
    125     if (p)
    126         max_changesets = atoi(p);
    127 
    128125    if (action != Xapian::DB_OPEN && !database_exists()) {
    129126        // FIXME: if we allow Xapian::DB_OVERWRITE, check it here
    130127
    FlintDatabase::set_revision_number(flint_revision_number_t new_revision)  
    434431    int changes_fd = -1;
    435432    string changes_name;
    436433
     434    const char *p = getenv("XAPIAN_MAX_CHANGESETS");
     435    if (p)
     436        max_changesets = atoi(p);
     437
    437438    if (max_changesets > 0) {
    438439        flint_revision_number_t old_revision = get_revision_number();
    439440        if (old_revision) {
    FlintDatabase::set_revision_number(flint_revision_number_t new_revision)  
    497498
    498499        throw;
    499500    }
     501
     502    if (changes_fd >= 0 && max_changesets < new_revision) {
     503        // while change sets less than N - max_changesets exist, delete them
     504        // 1 must be subtracted so we don't delete the changset we just wrote
     505        // when max_changesets = 1
     506        unsigned rev = new_revision - max_changesets - 1;
     507        while (io_unlink(db_dir + "/changes" + str(rev--)))  { }
     508    }
    500509}
    501510
    502511void
  • xapian-core/tests/api_replicate.cc

    diff --git a/xapian-core/tests/api_replicate.cc b/xapian-core/tests/api_replicate.cc
    index fdd18f8..cd24fdd 100644
    a b  
    33 * Copyright 2008 Lemur Consulting Ltd
    44 * Copyright 2009,2010 Olly Betts
    55 * Copyright 2010 Richard Boulton
     6 * Copyright 2011 Dan Colish
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
    DEFINE_TESTCASE(replicate3, replicas) {  
    512513    return true;
    513514}
    514515
    515 // Basic test of replication functionality.
     516// Tests for max_changesets
    516517DEFINE_TESTCASE(replicate4, replicas) {
    517518    string tempdir = ".replicatmp";
    518519    mktmpdir(tempdir);
    519520    string masterpath = get_named_writable_database_path("master");
    520521
    521     set_max_changesets(10);
     522    set_max_changesets(1);
     523
     524    Xapian::WritableDatabase orig(get_named_writable_database("master"));
     525    Xapian::DatabaseMaster master(masterpath);
     526    string replicapath = tempdir + "/replica";
     527    Xapian::DatabaseReplica replica(replicapath);
     528
     529    // Add a document with no positions to the original database.
     530    Xapian::Document doc1;
     531    doc1.set_data(string("doc1"));
     532    doc1.add_term("nopos");
     533    orig.add_document(doc1);
     534    orig.commit();
     535
     536    // Apply the replication - we don't have changesets stored, so this should
     537    // just do a database copy, and return a count of 1.
     538    int count = replicate(master, replica, tempdir, 0, 1, 1);
     539    TEST_EQUAL(count, 1);
     540    {
     541        Xapian::Database dbcopy(replicapath);
     542        TEST_EQUAL(orig.get_uuid(), dbcopy.get_uuid());
     543    }
     544
     545    // Add a document with positional information to the original database.
     546    doc1.add_posting("pos", 1);
     547    orig.add_document(doc1);
     548    orig.commit();
     549
     550    // Replicate, and check that we have the positional information.
     551    count = replicate(master, replica, tempdir, 1, 0, 1);
     552    TEST_EQUAL(count, 2);
     553    {
     554        Xapian::Database dbcopy(replicapath);
     555        TEST_EQUAL(orig.get_uuid(), dbcopy.get_uuid());
     556    }
     557    check_equal_dbs(masterpath, replicapath);
     558
     559    // Add a document with no positions to the original database.
     560    Xapian::Document doc2;
     561    doc2.set_data(string("doc2"));
     562    doc2.add_term("nopos");
     563    orig.add_document(doc2);
     564    orig.commit();
     565
     566    // Replicate, and check that we have the positional information.
     567    count = replicate(master, replica, tempdir, 1, 0, 1);
     568    TEST_EQUAL(count, 2);
     569    {
     570        Xapian::Database dbcopy(replicapath);
     571        TEST_EQUAL(orig.get_uuid(), dbcopy.get_uuid());
     572    }
     573    check_equal_dbs(masterpath, replicapath);
     574    TEST(!file_exists(masterpath + "/changes1"));
     575
     576    // Need to close the replica before we remove the temporary directory on
     577    // Windows.
     578    replica.close();
     579    rmtmpdir(tempdir);
     580    return true;
     581}
     582
     583
     584// Tests for max_changesets
     585DEFINE_TESTCASE(replicate5, replicas) {
     586    SKIP_TEST_FOR_BACKEND("chert");
     587    SKIP_TEST_FOR_BACKEND("flint");
     588    string tempdir = ".replicatmp";
     589    mktmpdir(tempdir);
     590    string masterpath = get_named_writable_database_path("master");
     591
     592    set_max_changesets(2);
    522593
    523594    Xapian::WritableDatabase orig(get_named_writable_database("master"));
    524595    Xapian::DatabaseMaster master(masterpath);
    DEFINE_TESTCASE(replicate4, replicas) {  
    555626    }
    556627    check_equal_dbs(masterpath, replicapath);
    557628
     629    // Add a document with no positions to the original database.
     630    Xapian::Document doc2;
     631    doc2.set_data(string("doc2"));
     632    doc2.add_term("nopos");
     633    orig.add_document(doc2);
     634    orig.commit();
     635
     636    // Replicate, and check that we have the positional information.
     637    count = replicate(master, replica, tempdir, 1, 0, 1);
     638    TEST_EQUAL(count, 2);
     639    {
     640        Xapian::Database dbcopy(replicapath);
     641        TEST_EQUAL(orig.get_uuid(), dbcopy.get_uuid());
     642    }
     643    check_equal_dbs(masterpath, replicapath);
     644
     645    // Add a document with no positions to the original database.
     646    Xapian::Document doc3;
     647    doc3.set_data(string("doc3"));
     648    doc3.add_term("nonopos");
     649    orig.add_document(doc3);
     650    orig.commit();
     651
     652    // Replicate, and check that we have the positional information.
     653    count = replicate(master, replica, tempdir, 1, 0, 1);
     654    TEST_EQUAL(count, 2);
     655    {
     656        Xapian::Database dbcopy(replicapath);
     657        TEST_EQUAL(orig.get_uuid(), dbcopy.get_uuid());
     658    }
     659    check_equal_dbs(masterpath, replicapath);
     660   
     661    // Ensure that only these changesets exists
     662    TEST(!file_exists(masterpath + "/changes1"));
     663    TEST(file_exists(masterpath + "/changes2"));
     664    TEST(file_exists(masterpath + "/changes3"));
     665
     666    set_max_changesets(3);
     667    masterpath = get_named_writable_database_path("master");
     668
     669    // Add a document with no positions to the original database.
     670    Xapian::Document doc4;
     671    doc4.set_data(string("doc4"));
     672    doc4.add_term("nononopos");
     673    orig.add_document(doc4);
     674    orig.commit();
     675
     676    // Replicate, and check that we have the positional information.
     677    count = replicate(master, replica, tempdir, 1, 0, 1);
     678    TEST_EQUAL(count, 2);
     679    {
     680        Xapian::Database dbcopy(replicapath);
     681        TEST_EQUAL(orig.get_uuid(), dbcopy.get_uuid());
     682    }
     683    check_equal_dbs(masterpath, replicapath);
     684
     685    // Add a document with no positions to the original database.
     686    Xapian::Document doc5;
     687    doc5.set_data(string("doc5"));
     688    doc5.add_term("nonononopos");
     689    orig.add_document(doc5);
     690    orig.commit();
     691
     692    // Replicate, and check that we have the positional information.
     693    count = replicate(master, replica, tempdir, 1, 0, 1);
     694    TEST_EQUAL(count, 2);
     695    {
     696        Xapian::Database dbcopy(replicapath);
     697        TEST_EQUAL(orig.get_uuid(), dbcopy.get_uuid());
     698    }
     699    check_equal_dbs(masterpath, replicapath);
     700   
     701    TEST(!file_exists(masterpath + "/changes2"));
     702    TEST(file_exists(masterpath + "/changes3"));
     703    TEST(file_exists(masterpath + "/changes4"));
     704    TEST(file_exists(masterpath + "/changes5"));
     705
    558706    // Need to close the replica before we remove the temporary directory on
    559707    // Windows.
    560708    replica.close();