Ticket #266: set_flush_threshold.patch

File set_flush_threshold.patch, 22.1 KB (added by Richard Boulton, 13 years ago)

Patch which adds set_flush_threshold() and set_commit_threshold() methods.

  • tests/api_transdb.cc

     
    126126
    127127    return true;
    128128}
     129
     130/// Test set_flush_threshold inside a simple transaction.
     131DEFINE_TESTCASE(minmemtransaction1, transactions) {
     132    Xapian::WritableDatabase db(get_writable_database("apitest_simpledata"));
     133
     134    Xapian::doccount docs = db.get_doccount();
     135    db.begin_transaction();
     136    Xapian::Document doc;
     137    doc.set_data("testing");
     138    doc.add_term("befuddlement");
     139    db.add_document(doc);
     140    TEST_EXCEPTION(Xapian::InvalidOperationError, db.begin_transaction());
     141    db.set_flush_threshold(0);
     142    TEST_EQUAL(db.get_doccount(), docs + 1);
     143    TEST_EQUAL(db.get_termfreq("befuddlement"), 1);
     144    db.commit_transaction();
     145    TEST_EQUAL(db.get_doccount(), docs + 1);
     146    TEST_EQUAL(db.get_termfreq("befuddlement"), 1);
     147
     148    return true;
     149}
     150
     151/// Test cancelling a simple transaction after calling set_flush_threshold(0).
     152DEFINE_TESTCASE(minmemcanceltransaction1, transactions) {
     153    Xapian::WritableDatabase db(get_writable_database("apitest_simpledata"));
     154
     155    Xapian::doccount docs = db.get_doccount();
     156    db.begin_transaction();
     157    Xapian::Document doc;
     158    doc.set_data("testing");
     159    doc.add_term("befuddlement");
     160    db.add_document(doc);
     161    TEST_EXCEPTION(Xapian::InvalidOperationError, db.begin_transaction());
     162    TEST_EQUAL(db.get_doccount(), docs + 1);
     163    TEST_EQUAL(db.get_termfreq("befuddlement"), 1);
     164    db.set_flush_threshold(0);
     165    db.cancel_transaction();
     166    TEST_EQUAL(db.get_doccount(), docs);
     167    TEST_EQUAL(db.get_termfreq("befuddlement"), 0);
     168
     169    return true;
     170}
  • include/xapian/database.h

     
    55 * Copyright 2002 Ananova Ltd
    66 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011 Olly Betts
    77 * Copyright 2006,2008 Lemur Consulting Ltd
     8 * Copyright 2011 Richard Boulton
    89 *
    910 * This program is free software; you can redistribute it and/or
    1011 * modify it under the terms of the GNU General Public License as
     
    565566         */
    566567        void flush() { commit(); }
    567568
     569        /** Set the threshold at which changes are flushed automatically.
     570         *
     571         *  This may be used to exert some control on how often buffered
     572         *  changes are flushed from memory during indexing.  A flush will
     573         *  happen after the specified number of changes have been made since
     574         *  the previous flush or commit.
     575         *
     576         *  The initial value for the flush threshold will be read from the
     577         *  "XAPIAN_FLUSH_THRESHOLD" environment variable.
     578         *
     579         *  This may be called whether a transaction is currently in progress
     580         *  or not, and takes effect both during transactions and outside
     581         *  transactions.
     582         *
     583         *  If the new threshold is less than or equal to the number of changes
     584         *  that have occurred since the previous flush or commit, an immediate
     585         *  flush will be performed.
     586         *
     587         *  A threshold of 0 disables automatic flushing, but causes an
     588         *  immediate flush.  Be aware that various operations will still cause
     589         *  an implicit flush even if the threshold is set to 0, so this is not
     590         *  an effective way to control IO.
     591         *
     592         *  The old value of the flush threshold will be returned, so a call to
     593         *  set_flush_threshold(set_flush_threshold(0)) causes an immediate
     594         *  flush, but leaves the threshold unchanged.
     595         *
     596         *  Not all backends support a flush threshold - such backends will
     597         *  always return 0 from this method.
     598         *
     599         *  @exception Xapian::DatabaseError will be thrown if a problem occurs
     600         *             while modifying the database.
     601         *
     602         *  @exception Xapian::DatabaseCorruptError will be thrown if the
     603         *             database is in a corrupt state.
     604         */
     605        doccount set_flush_threshold(doccount threshold);
     606
     607        /** Set the threshold at which changes are committed automatically.
     608         *
     609         *  This may be used to control how often changes are committed during
     610         *  indexing, when not within a transaction.  Such a commit will happen
     611         *  after the specified number of changes have been made since the
     612         *  previous commit.
     613         *
     614         *  The initial value for the commit threshold will be read from the
     615         *  "XAPIAN_FLUSH_THRESHOLD" environment variable.
     616         *
     617         *  If the new threshold is less than or equal to the number of changes
     618         *  that have occurred since the previous commit, an immediate
     619         *  commit will be performed (unless a transaction is in progress).
     620         *
     621         *  A commit automatically performs a flush first.
     622         *
     623         *  A threshold of 0 disables automatic committing, but causes an
     624         *  immediate commit (assuming no transaction is in progress).
     625         *
     626         *  Not all backends support a commit threshold - such backends will
     627         *  always return 0 from this method.
     628         *
     629         *  @exception Xapian::DatabaseError will be thrown if a problem occurs
     630         *             while modifying the database.
     631         *
     632         *  @exception Xapian::DatabaseCorruptError will be thrown if the
     633         *             database is in a corrupt state.
     634         */
     635        doccount set_commit_threshold(doccount threshold);
     636
    568637        /** Begin a transaction.
    569638         *
    570639         *  In Xapian a transaction is a group of modifications to the database
  • common/database.h

     
    412412        /** Cancel pending modifications to the database. */
    413413        virtual void cancel();
    414414
     415        /** Set the threshold at which changes are flushed.
     416         *
     417         *  See WritableDatabase::set_flush_threshold() for more information.
     418         */
     419        virtual doccount set_flush_threshold(doccount);
     420
     421        /** Set the threshold at which changes are committed..
     422         *
     423         *  See WritableDatabase::set_commit_threshold() for more information.
     424         */
     425        virtual doccount set_commit_threshold(doccount);
     426
    415427        /** Begin a transaction.
    416428         *
    417429         *  See WritableDatabase::begin_transaction() for more information.
  • api/omdatabase.cc

     
    784784    internal[0]->commit();
    785785}
    786786
     787doccount
     788WritableDatabase::set_flush_threshold(doccount threshold)
     789{
     790    LOGCALL_VOID(API, "WritableDatabase::set_flush_threshold", threshold);
     791    if (internal.size() != 1) only_one_subdatabase_allowed();
     792    RETURN(internal[0]->set_flush_threshold(threshold));
     793}
     794
     795doccount
     796WritableDatabase::set_commit_threshold(doccount threshold)
     797{
     798    LOGCALL_VOID(API, "WritableDatabase::set_commit_threshold", threshold);
     799    if (internal.size() != 1) only_one_subdatabase_allowed();
     800    RETURN(internal[0]->set_commit_threshold(threshold));
     801}
     802
    787803void
    788804WritableDatabase::begin_transaction(bool flushed)
    789805{
  • api/error.cc

     
    3838
    3939Xapian::Error::Error(const std::string &msg_, const std::string &context_,
    4040                     const char * type_, const char * error_string_)
    41     : msg(msg_), context(context_), error_string(), type(type_),
    42       my_errno(0), already_handled(false)
     41    : msg(msg_), context(context_), type(type_), my_errno(0),
     42      error_string(), already_handled(false)
    4343{
    4444    if (error_string_) error_string.assign(error_string_);
    4545}
  • backends/database.cc

     
    122122    Assert(false);
    123123}
    124124
     125doccount
     126Database::Internal::set_flush_threshold(doccount)
     127{
     128    // Writable databases may override this method.
     129    return 0;
     130}
     131
     132doccount
     133Database::Internal::set_commit_threshold(doccount)
     134{
     135    // Writable databases may override this method.
     136    return 0;
     137}
     138
    125139void
    126140Database::Internal::begin_transaction(bool flushed)
    127141{
  • backends/chert/chert_database.cc

     
    10231023          freq_deltas(),
    10241024          doclens(),
    10251025          mod_plists(),
    1026           change_count(0),
     1026          changes_since_flush(0),
     1027          changes_since_commit(0),
    10271028          flush_threshold(0),
     1029          commit_threshold(0),
    10281030          modify_shortcut_document(NULL),
    10291031          modify_shortcut_docid(0)
    10301032{
     
    10331035    const char *p = getenv("XAPIAN_FLUSH_THRESHOLD");
    10341036    if (p)
    10351037        flush_threshold = atoi(p);
    1036     if (flush_threshold == 0)
     1038    else
    10371039        flush_threshold = 10000;
     1040    commit_threshold = flush_threshold;
    10381041}
    10391042
    10401043ChertWritableDatabase::~ChertWritableDatabase()
     
    10481051{
    10491052    if (transaction_active())
    10501053        throw Xapian::InvalidOperationError("Can't commit during a transaction");
    1051     if (change_count) flush_postlist_changes();
     1054    if (changes_since_flush)
     1055        flush_postlist_changes();
    10521056    apply();
    10531057}
    10541058
     
    10611065    freq_deltas.clear();
    10621066    doclens.clear();
    10631067    mod_plists.clear();
    1064     change_count = 0;
     1068    changes_since_flush = 0;
    10651069}
    10661070
    10671071void
     
    10801084{
    10811085    value_manager.set_value_stats(value_stats);
    10821086    ChertDatabase::apply();
     1087    changes_since_commit = 0;
    10831088}
    10841089
    10851090void
     1091ChertWritableDatabase::inc_change_counts()
     1092{
     1093    // FIXME: this should be done by checking memory usage, not the number of
     1094    // changes.
     1095    // We could also look at:
     1096    // * mod_plists.size()
     1097    // * doclens.size()
     1098    // * freq_deltas.size()
     1099    //
     1100    // cout << "+++ mod_plists.size() " << mod_plists.size() <<
     1101    //     ", doclens.size() " << doclens.size() <<
     1102    //     ", freq_deltas.size() " << freq_deltas.size() << endl;
     1103
     1104    ++changes_since_commit;
     1105    ++changes_since_flush;
     1106
     1107    if (commit_threshold && changes_since_commit >= commit_threshold) {
     1108        flush_postlist_changes();
     1109        if (!transaction_active()) apply();
     1110    } else if (flush_threshold && changes_since_flush >= flush_threshold) {
     1111        flush_postlist_changes();
     1112    }
     1113}
     1114
     1115void
    10861116ChertWritableDatabase::add_freq_delta(const string & tname,
    10871117                                      Xapian::termcount_diff tf_delta,
    10881118                                      Xapian::termcount_diff cf_delta)
     
    12071237        throw;
    12081238    }
    12091239
    1210     // FIXME: this should be done by checking memory usage, not the number of
    1211     // changes.
    1212     // We could also look at:
    1213     // * mod_plists.size()
    1214     // * doclens.size()
    1215     // * freq_deltas.size()
    1216     //
    1217     // cout << "+++ mod_plists.size() " << mod_plists.size() <<
    1218     //     ", doclens.size() " << doclens.size() <<
    1219     //     ", freq_deltas.size() " << freq_deltas.size() << endl;
    1220     if (++change_count >= flush_threshold) {
    1221         flush_postlist_changes();
    1222         if (!transaction_active()) apply();
    1223     }
     1240    inc_change_counts();
    12241241
    12251242    RETURN(did);
    12261243}
     
    12831300        throw;
    12841301    }
    12851302
    1286     if (++change_count >= flush_threshold) {
    1287         flush_postlist_changes();
    1288         if (!transaction_active()) apply();
    1289     }
     1303    inc_change_counts();
    12901304}
    12911305
    12921306void
     
    14511465        throw;
    14521466    }
    14531467
    1454     if (++change_count >= flush_threshold) {
    1455         flush_postlist_changes();
    1456         if (!transaction_active()) apply();
    1457     }
     1468    inc_change_counts();
    14581469}
    14591470
    14601471Xapian::Document::Internal *
     
    15791590    // If there are changes, we don't have code to iterate the modified value
    15801591    // list so we need to flush (but don't commit - there may be a transaction
    15811592    // in progress).
    1582     if (change_count) value_manager.merge_changes();
     1593    if (changes_since_flush) value_manager.merge_changes();
    15831594    RETURN(ChertDatabase::open_value_list(slot));
    15841595}
    15851596
     
    15901601    // If there are changes, terms may have been added or removed, and so we
    15911602    // need to flush (but don't commit - there may be a transaction in
    15921603    // progress).
    1593     if (change_count) flush_postlist_changes();
     1604    if (changes_since_flush) flush_postlist_changes();
    15941605    RETURN(ChertDatabase::open_allterms(prefix));
    15951606}
    15961607
     
    16031614    doclens.clear();
    16041615    mod_plists.clear();
    16051616    value_stats.clear();
    1606     change_count = 0;
     1617    changes_since_flush = 0;
     1618    changes_since_commit = 0;
    16071619}
    16081620
     1621doccount
     1622ChertWritableDatabase::set_flush_threshold(doccount new_flush_threshold)
     1623{
     1624    LOGCALL(DB, doccount, "ChertWritableDatabase::set_flush_threshold",
     1625            new_flush_threshold);
     1626    doccount old_flush_threshold = flush_threshold;
     1627    flush_threshold = new_flush_threshold;
     1628
     1629    if (changes_since_flush >= flush_threshold) {
     1630        flush_postlist_changes();
     1631    }
     1632
     1633    RETURN(old_flush_threshold);
     1634}
     1635
     1636doccount
     1637ChertWritableDatabase::set_commit_threshold(doccount new_commit_threshold)
     1638{
     1639    LOGCALL(DB, doccount, "ChertWritableDatabase::set_commit_threshold",
     1640            new_commit_threshold);
     1641    doccount old_commit_threshold = commit_threshold;
     1642    commit_threshold = new_commit_threshold;
     1643
     1644    if (changes_since_commit >= commit_threshold) {
     1645        flush_postlist_changes();
     1646        if (!transaction_active()) apply();
     1647    }
     1648
     1649    RETURN(old_commit_threshold);
     1650}
     1651
    16091652void
    16101653ChertWritableDatabase::add_spelling(const string & word,
    16111654                                    Xapian::termcount freqinc) const
  • backends/chert/chert_database.h

     
    312312        /** The number of documents added, deleted, or replaced since the last
    313313         *  flush.
    314314         */
    315         mutable Xapian::doccount change_count;
     315        mutable Xapian::doccount changes_since_flush;
    316316
    317         /// If change_count reaches this threshold we automatically flush.
     317        /** The number of documents added, deleted, or replaced since the last
     318         *  commit.
     319         */
     320        mutable Xapian::doccount changes_since_commit;
     321
     322        /** If changes_since_flush reaches this threshold we automatically
     323         *  flush.
     324         */
    318325        Xapian::doccount flush_threshold;
    319326
     327        /** If changes_since_commit reaches this threshold we automatically
     328         *  commit.
     329         */
     330        Xapian::doccount commit_threshold;
     331
    320332        /** A pointer to the last document which was returned by
    321333         *  open_document(), or NULL if there is no such valid document.  This
    322334         *  is used purely for comparing with a supplied document to help with
     
    338350        /// Apply changes.
    339351        void apply();
    340352
     353        /// Increment the change counts, and flush/commit if appropriate.
     354        void inc_change_counts();
     355
    341356        /** Add or modify an entry in freq_deltas.
    342357         *
    343358         *  @param tname The term to modify the entry for.
     
    383398        /** Cancel pending modifications to the database. */
    384399        void cancel();
    385400
     401        /// Set the threshold at which changes are flushed automatically.
     402        Xapian::doccount set_flush_threshold(Xapian::doccount);
     403
     404        /// Set the threshold at which changes are committed automatically.
     405        Xapian::doccount set_commit_threshold(Xapian::doccount);
     406
    386407        Xapian::docid add_document(const Xapian::Document & document);
    387408        Xapian::docid add_document_(Xapian::docid did, const Xapian::Document & document);
    388409        // Stop the default implementation of delete_document(term) and
  • backends/brass/brass_database.cc

     
    10371037BrassWritableDatabase::BrassWritableDatabase(const string &dir, int action,
    10381038                                               int block_size)
    10391039        : BrassDatabase(dir, action, block_size),
    1040           change_count(0),
     1040          changes_since_flush(0),
     1041          changes_since_commit(0),
    10411042          flush_threshold(0),
     1043          commit_threshold(0),
    10421044          modify_shortcut_document(NULL),
    10431045          modify_shortcut_docid(0)
    10441046{
     
    10471049    const char *p = getenv("XAPIAN_FLUSH_THRESHOLD");
    10481050    if (p)
    10491051        flush_threshold = atoi(p);
    1050     if (flush_threshold == 0)
     1052    else
    10511053        flush_threshold = 10000;
     1054    commit_threshold = flush_threshold;
    10521055}
    10531056
    10541057BrassWritableDatabase::~BrassWritableDatabase()
     
    10621065{
    10631066    if (transaction_active())
    10641067        throw Xapian::InvalidOperationError("Can't commit during a transaction");
    1065     if (change_count) flush_postlist_changes();
     1068    if (changes_since_flush)
     1069        flush_postlist_changes();
    10661070    apply();
    10671071}
    10681072
     
    10721076    stats.write(postlist_table);
    10731077    inverter.flush(postlist_table);
    10741078
    1075     change_count = 0;
     1079    changes_since_flush = 0;
    10761080}
    10771081
    10781082void
     
    10911095{
    10921096    value_manager.set_value_stats(value_stats);
    10931097    BrassDatabase::apply();
     1098    changes_since_commit = 0;
    10941099}
    10951100
     1101void
     1102BrassWritableDatabase::inc_change_counts()
     1103{
     1104    // FIXME: this should be done by checking memory usage, not the number of
     1105    // changes.
     1106    // We could also look at:
     1107    // * mod_plists.size()
     1108    // * doclens.size()
     1109    // * freq_deltas.size()
     1110    //
     1111    // cout << "+++ mod_plists.size() " << mod_plists.size() <<
     1112    //     ", doclens.size() " << doclens.size() <<
     1113    //     ", freq_deltas.size() " << freq_deltas.size() << endl;
     1114
     1115    ++changes_since_commit;
     1116    ++changes_since_flush;
     1117
     1118    if (commit_threshold && changes_since_commit >= commit_threshold) {
     1119        flush_postlist_changes();
     1120        if (!transaction_active()) apply();
     1121    } else if (flush_threshold && changes_since_flush >= flush_threshold) {
     1122        flush_postlist_changes();
     1123    }
     1124}
     1125
    10961126Xapian::docid
    10971127BrassWritableDatabase::add_document(const Xapian::Document & document)
    10981128{
     
    11591189        throw;
    11601190    }
    11611191
    1162     // FIXME: this should be done by checking memory usage, not the number of
    1163     // changes.  We could also look at the amount of data the inverter object
    1164     // currently holds.
    1165     if (++change_count >= flush_threshold) {
    1166         flush_postlist_changes();
    1167         if (!transaction_active()) apply();
    1168     }
     1192    inc_change_counts();
    11691193
    11701194    RETURN(did);
    11711195}
     
    12261250        throw;
    12271251    }
    12281252
    1229     if (++change_count >= flush_threshold) {
    1230         flush_postlist_changes();
    1231         if (!transaction_active()) apply();
    1232     }
     1253    inc_change_counts();
    12331254}
    12341255
    12351256void
     
    13911412        throw;
    13921413    }
    13931414
    1394     if (++change_count >= flush_threshold) {
    1395         flush_postlist_changes();
    1396         if (!transaction_active()) apply();
    1397     }
     1415    inc_change_counts();
    13981416}
    13991417
    14001418Xapian::Document::Internal *
     
    14971515    // If there are changes, we don't have code to iterate the modified value
    14981516    // list so we need to flush (but don't commit - there may be a transaction
    14991517    // in progress).
    1500     if (change_count) value_manager.merge_changes();
     1518    if (changes_since_flush) value_manager.merge_changes();
    15011519    RETURN(BrassDatabase::open_value_list(slot));
    15021520}
    15031521
     
    15051523BrassWritableDatabase::open_allterms(const string & prefix) const
    15061524{
    15071525    LOGCALL(DB, TermList *, "BrassWritableDatabase::open_allterms", NO_ARGS);
    1508     if (change_count) {
     1526    if (changes_since_flush) {
    15091527        // There are changes, and terms may have been added or removed, and so
    15101528        // we need to flush changes for terms with the specified prefix (but
    15111529        // don't commit - there may be a transaction in progress).
    15121530        inverter.flush_post_lists(postlist_table, prefix);
    15131531        if (prefix.empty()) {
    15141532            // We've flushed all the posting list changes, but the document
    1515             // length and stats haven't been written, so set change_count to 1.
     1533            // length and stats haven't been written, so set
     1534            // changes_since_flush to 1.
    15161535            // FIXME: Can we handle this better?
    1517             change_count = 1;
     1536            changes_since_flush = 1;
    15181537        }
    15191538    }
    15201539    RETURN(BrassDatabase::open_allterms(prefix));
     
    15281547
    15291548    inverter.clear();
    15301549    value_stats.clear();
    1531     change_count = 0;
     1550    changes_since_flush = 0;
     1551    changes_since_commit = 0;
    15321552}
    15331553
     1554doccount
     1555BrassWritableDatabase::set_flush_threshold(doccount new_flush_threshold)
     1556{
     1557    LOGCALL(DB, doccount, "BrassWritableDatabase::set_flush_threshold",
     1558            new_flush_threshold);
     1559    doccount old_flush_threshold = flush_threshold;
     1560    flush_threshold = new_flush_threshold;
     1561
     1562    if (changes_since_flush >= flush_threshold) {
     1563        flush_postlist_changes();
     1564    }
     1565
     1566    RETURN(old_flush_threshold);
     1567}
     1568
     1569doccount
     1570BrassWritableDatabase::set_commit_threshold(doccount new_commit_threshold)
     1571{
     1572    LOGCALL(DB, doccount, "BrassWritableDatabase::set_commit_threshold",
     1573            new_commit_threshold);
     1574    doccount old_commit_threshold = commit_threshold;
     1575    commit_threshold = new_commit_threshold;
     1576
     1577    if (changes_since_commit >= commit_threshold) {
     1578        flush_postlist_changes();
     1579        if (!transaction_active()) apply();
     1580    }
     1581
     1582    RETURN(old_commit_threshold);
     1583}
     1584
    15341585void
    15351586BrassWritableDatabase::add_spelling(const string & word,
    15361587                                    Xapian::termcount freqinc) const
  • backends/brass/brass_database.h

     
    305305        /** The number of documents added, deleted, or replaced since the last
    306306         *  flush.
    307307         */
    308         mutable Xapian::doccount change_count;
     308        mutable Xapian::doccount changes_since_flush;
    309309
    310         /// If change_count reaches this threshold we automatically flush.
     310        /** The number of documents added, deleted, or replaced since the last
     311         *  commit.
     312         */
     313        mutable Xapian::doccount changes_since_commit;
     314
     315        /** If changes_since_flush reaches this threshold we automatically
     316         *  flush.
     317         */
    311318        Xapian::doccount flush_threshold;
    312319
     320        /** If changes_since_commit reaches this threshold we automatically
     321         *  commit.
     322         */
     323        Xapian::doccount commit_threshold;
     324
    313325        /** A pointer to the last document which was returned by
    314326         *  open_document(), or NULL if there is no such valid document.  This
    315327         *  is used purely for comparing with a supplied document to help with
     
    331343        /// Apply changes.
    332344        void apply();
    333345
     346        /// Increment the change counts, and flush/commit if appropriate.
     347        void inc_change_counts();
     348
    334349        //@{
    335350        /** Implementation of virtual methods: see Database::Internal for
    336351         *  details.
     
    340355        /** Cancel pending modifications to the database. */
    341356        void cancel();
    342357
     358        /// Set the threshold at which changes are flushed automatically.
     359        Xapian::doccount set_flush_threshold(Xapian::doccount);
     360
     361        /// Set the threshold at which changes are committed automatically.
     362        Xapian::doccount set_commit_threshold(Xapian::doccount);
     363
    343364        Xapian::docid add_document(const Xapian::Document & document);
    344365        Xapian::docid add_document_(Xapian::docid did, const Xapian::Document & document);
    345366        // Stop the default implementation of delete_document(term) and