Ticket #266: set_flush_threshold.patch
File set_flush_threshold.patch, 22.1 KB (added by , 13 years ago) |
---|
-
tests/api_transdb.cc
126 126 127 127 return true; 128 128 } 129 130 /// Test set_flush_threshold inside a simple transaction. 131 DEFINE_TESTCASE(minmemtransaction1, transactions) { 132 Xapian::WritableDatabase db(get_writable_database("apitest_simpledata")); 133 134 Xapian::doccount docs = db.get_doccount(); 135 db.begin_transaction(); 136 Xapian::Document doc; 137 doc.set_data("testing"); 138 doc.add_term("befuddlement"); 139 db.add_document(doc); 140 TEST_EXCEPTION(Xapian::InvalidOperationError, db.begin_transaction()); 141 db.set_flush_threshold(0); 142 TEST_EQUAL(db.get_doccount(), docs + 1); 143 TEST_EQUAL(db.get_termfreq("befuddlement"), 1); 144 db.commit_transaction(); 145 TEST_EQUAL(db.get_doccount(), docs + 1); 146 TEST_EQUAL(db.get_termfreq("befuddlement"), 1); 147 148 return true; 149 } 150 151 /// Test cancelling a simple transaction after calling set_flush_threshold(0). 152 DEFINE_TESTCASE(minmemcanceltransaction1, transactions) { 153 Xapian::WritableDatabase db(get_writable_database("apitest_simpledata")); 154 155 Xapian::doccount docs = db.get_doccount(); 156 db.begin_transaction(); 157 Xapian::Document doc; 158 doc.set_data("testing"); 159 doc.add_term("befuddlement"); 160 db.add_document(doc); 161 TEST_EXCEPTION(Xapian::InvalidOperationError, db.begin_transaction()); 162 TEST_EQUAL(db.get_doccount(), docs + 1); 163 TEST_EQUAL(db.get_termfreq("befuddlement"), 1); 164 db.set_flush_threshold(0); 165 db.cancel_transaction(); 166 TEST_EQUAL(db.get_doccount(), docs); 167 TEST_EQUAL(db.get_termfreq("befuddlement"), 0); 168 169 return true; 170 } -
include/xapian/database.h
5 5 * Copyright 2002 Ananova Ltd 6 6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011 Olly Betts 7 7 * Copyright 2006,2008 Lemur Consulting Ltd 8 * Copyright 2011 Richard Boulton 8 9 * 9 10 * This program is free software; you can redistribute it and/or 10 11 * modify it under the terms of the GNU General Public License as … … 565 566 */ 566 567 void flush() { commit(); } 567 568 569 /** Set the threshold at which changes are flushed automatically. 570 * 571 * This may be used to exert some control on how often buffered 572 * changes are flushed from memory during indexing. A flush will 573 * happen after the specified number of changes have been made since 574 * the previous flush or commit. 575 * 576 * The initial value for the flush threshold will be read from the 577 * "XAPIAN_FLUSH_THRESHOLD" environment variable. 578 * 579 * This may be called whether a transaction is currently in progress 580 * or not, and takes effect both during transactions and outside 581 * transactions. 582 * 583 * If the new threshold is less than or equal to the number of changes 584 * that have occurred since the previous flush or commit, an immediate 585 * flush will be performed. 586 * 587 * A threshold of 0 disables automatic flushing, but causes an 588 * immediate flush. Be aware that various operations will still cause 589 * an implicit flush even if the threshold is set to 0, so this is not 590 * an effective way to control IO. 591 * 592 * The old value of the flush threshold will be returned, so a call to 593 * set_flush_threshold(set_flush_threshold(0)) causes an immediate 594 * flush, but leaves the threshold unchanged. 595 * 596 * Not all backends support a flush threshold - such backends will 597 * always return 0 from this method. 598 * 599 * @exception Xapian::DatabaseError will be thrown if a problem occurs 600 * while modifying the database. 601 * 602 * @exception Xapian::DatabaseCorruptError will be thrown if the 603 * database is in a corrupt state. 604 */ 605 doccount set_flush_threshold(doccount threshold); 606 607 /** Set the threshold at which changes are committed automatically. 608 * 609 * This may be used to control how often changes are committed during 610 * indexing, when not within a transaction. Such a commit will happen 611 * after the specified number of changes have been made since the 612 * previous commit. 613 * 614 * The initial value for the commit threshold will be read from the 615 * "XAPIAN_FLUSH_THRESHOLD" environment variable. 616 * 617 * If the new threshold is less than or equal to the number of changes 618 * that have occurred since the previous commit, an immediate 619 * commit will be performed (unless a transaction is in progress). 620 * 621 * A commit automatically performs a flush first. 622 * 623 * A threshold of 0 disables automatic committing, but causes an 624 * immediate commit (assuming no transaction is in progress). 625 * 626 * Not all backends support a commit threshold - such backends will 627 * always return 0 from this method. 628 * 629 * @exception Xapian::DatabaseError will be thrown if a problem occurs 630 * while modifying the database. 631 * 632 * @exception Xapian::DatabaseCorruptError will be thrown if the 633 * database is in a corrupt state. 634 */ 635 doccount set_commit_threshold(doccount threshold); 636 568 637 /** Begin a transaction. 569 638 * 570 639 * In Xapian a transaction is a group of modifications to the database -
common/database.h
412 412 /** Cancel pending modifications to the database. */ 413 413 virtual void cancel(); 414 414 415 /** Set the threshold at which changes are flushed. 416 * 417 * See WritableDatabase::set_flush_threshold() for more information. 418 */ 419 virtual doccount set_flush_threshold(doccount); 420 421 /** Set the threshold at which changes are committed.. 422 * 423 * See WritableDatabase::set_commit_threshold() for more information. 424 */ 425 virtual doccount set_commit_threshold(doccount); 426 415 427 /** Begin a transaction. 416 428 * 417 429 * See WritableDatabase::begin_transaction() for more information. -
api/omdatabase.cc
784 784 internal[0]->commit(); 785 785 } 786 786 787 doccount 788 WritableDatabase::set_flush_threshold(doccount threshold) 789 { 790 LOGCALL_VOID(API, "WritableDatabase::set_flush_threshold", threshold); 791 if (internal.size() != 1) only_one_subdatabase_allowed(); 792 RETURN(internal[0]->set_flush_threshold(threshold)); 793 } 794 795 doccount 796 WritableDatabase::set_commit_threshold(doccount threshold) 797 { 798 LOGCALL_VOID(API, "WritableDatabase::set_commit_threshold", threshold); 799 if (internal.size() != 1) only_one_subdatabase_allowed(); 800 RETURN(internal[0]->set_commit_threshold(threshold)); 801 } 802 787 803 void 788 804 WritableDatabase::begin_transaction(bool flushed) 789 805 { -
api/error.cc
38 38 39 39 Xapian::Error::Error(const std::string &msg_, const std::string &context_, 40 40 const char * type_, const char * error_string_) 41 : msg(msg_), context(context_), error_string(), type(type_),42 my_errno(0), already_handled(false)41 : msg(msg_), context(context_), type(type_), my_errno(0), 42 error_string(), already_handled(false) 43 43 { 44 44 if (error_string_) error_string.assign(error_string_); 45 45 } -
backends/database.cc
122 122 Assert(false); 123 123 } 124 124 125 doccount 126 Database::Internal::set_flush_threshold(doccount) 127 { 128 // Writable databases may override this method. 129 return 0; 130 } 131 132 doccount 133 Database::Internal::set_commit_threshold(doccount) 134 { 135 // Writable databases may override this method. 136 return 0; 137 } 138 125 139 void 126 140 Database::Internal::begin_transaction(bool flushed) 127 141 { -
backends/chert/chert_database.cc
1023 1023 freq_deltas(), 1024 1024 doclens(), 1025 1025 mod_plists(), 1026 change_count(0), 1026 changes_since_flush(0), 1027 changes_since_commit(0), 1027 1028 flush_threshold(0), 1029 commit_threshold(0), 1028 1030 modify_shortcut_document(NULL), 1029 1031 modify_shortcut_docid(0) 1030 1032 { … … 1033 1035 const char *p = getenv("XAPIAN_FLUSH_THRESHOLD"); 1034 1036 if (p) 1035 1037 flush_threshold = atoi(p); 1036 if (flush_threshold == 0)1038 else 1037 1039 flush_threshold = 10000; 1040 commit_threshold = flush_threshold; 1038 1041 } 1039 1042 1040 1043 ChertWritableDatabase::~ChertWritableDatabase() … … 1048 1051 { 1049 1052 if (transaction_active()) 1050 1053 throw Xapian::InvalidOperationError("Can't commit during a transaction"); 1051 if (change_count) flush_postlist_changes(); 1054 if (changes_since_flush) 1055 flush_postlist_changes(); 1052 1056 apply(); 1053 1057 } 1054 1058 … … 1061 1065 freq_deltas.clear(); 1062 1066 doclens.clear(); 1063 1067 mod_plists.clear(); 1064 change _count= 0;1068 changes_since_flush = 0; 1065 1069 } 1066 1070 1067 1071 void … … 1080 1084 { 1081 1085 value_manager.set_value_stats(value_stats); 1082 1086 ChertDatabase::apply(); 1087 changes_since_commit = 0; 1083 1088 } 1084 1089 1085 1090 void 1091 ChertWritableDatabase::inc_change_counts() 1092 { 1093 // FIXME: this should be done by checking memory usage, not the number of 1094 // changes. 1095 // We could also look at: 1096 // * mod_plists.size() 1097 // * doclens.size() 1098 // * freq_deltas.size() 1099 // 1100 // cout << "+++ mod_plists.size() " << mod_plists.size() << 1101 // ", doclens.size() " << doclens.size() << 1102 // ", freq_deltas.size() " << freq_deltas.size() << endl; 1103 1104 ++changes_since_commit; 1105 ++changes_since_flush; 1106 1107 if (commit_threshold && changes_since_commit >= commit_threshold) { 1108 flush_postlist_changes(); 1109 if (!transaction_active()) apply(); 1110 } else if (flush_threshold && changes_since_flush >= flush_threshold) { 1111 flush_postlist_changes(); 1112 } 1113 } 1114 1115 void 1086 1116 ChertWritableDatabase::add_freq_delta(const string & tname, 1087 1117 Xapian::termcount_diff tf_delta, 1088 1118 Xapian::termcount_diff cf_delta) … … 1207 1237 throw; 1208 1238 } 1209 1239 1210 // FIXME: this should be done by checking memory usage, not the number of 1211 // changes. 1212 // We could also look at: 1213 // * mod_plists.size() 1214 // * doclens.size() 1215 // * freq_deltas.size() 1216 // 1217 // cout << "+++ mod_plists.size() " << mod_plists.size() << 1218 // ", doclens.size() " << doclens.size() << 1219 // ", freq_deltas.size() " << freq_deltas.size() << endl; 1220 if (++change_count >= flush_threshold) { 1221 flush_postlist_changes(); 1222 if (!transaction_active()) apply(); 1223 } 1240 inc_change_counts(); 1224 1241 1225 1242 RETURN(did); 1226 1243 } … … 1283 1300 throw; 1284 1301 } 1285 1302 1286 if (++change_count >= flush_threshold) { 1287 flush_postlist_changes(); 1288 if (!transaction_active()) apply(); 1289 } 1303 inc_change_counts(); 1290 1304 } 1291 1305 1292 1306 void … … 1451 1465 throw; 1452 1466 } 1453 1467 1454 if (++change_count >= flush_threshold) { 1455 flush_postlist_changes(); 1456 if (!transaction_active()) apply(); 1457 } 1468 inc_change_counts(); 1458 1469 } 1459 1470 1460 1471 Xapian::Document::Internal * … … 1579 1590 // If there are changes, we don't have code to iterate the modified value 1580 1591 // list so we need to flush (but don't commit - there may be a transaction 1581 1592 // in progress). 1582 if (change _count) value_manager.merge_changes();1593 if (changes_since_flush) value_manager.merge_changes(); 1583 1594 RETURN(ChertDatabase::open_value_list(slot)); 1584 1595 } 1585 1596 … … 1590 1601 // If there are changes, terms may have been added or removed, and so we 1591 1602 // need to flush (but don't commit - there may be a transaction in 1592 1603 // progress). 1593 if (change _count) flush_postlist_changes();1604 if (changes_since_flush) flush_postlist_changes(); 1594 1605 RETURN(ChertDatabase::open_allterms(prefix)); 1595 1606 } 1596 1607 … … 1603 1614 doclens.clear(); 1604 1615 mod_plists.clear(); 1605 1616 value_stats.clear(); 1606 change_count = 0; 1617 changes_since_flush = 0; 1618 changes_since_commit = 0; 1607 1619 } 1608 1620 1621 doccount 1622 ChertWritableDatabase::set_flush_threshold(doccount new_flush_threshold) 1623 { 1624 LOGCALL(DB, doccount, "ChertWritableDatabase::set_flush_threshold", 1625 new_flush_threshold); 1626 doccount old_flush_threshold = flush_threshold; 1627 flush_threshold = new_flush_threshold; 1628 1629 if (changes_since_flush >= flush_threshold) { 1630 flush_postlist_changes(); 1631 } 1632 1633 RETURN(old_flush_threshold); 1634 } 1635 1636 doccount 1637 ChertWritableDatabase::set_commit_threshold(doccount new_commit_threshold) 1638 { 1639 LOGCALL(DB, doccount, "ChertWritableDatabase::set_commit_threshold", 1640 new_commit_threshold); 1641 doccount old_commit_threshold = commit_threshold; 1642 commit_threshold = new_commit_threshold; 1643 1644 if (changes_since_commit >= commit_threshold) { 1645 flush_postlist_changes(); 1646 if (!transaction_active()) apply(); 1647 } 1648 1649 RETURN(old_commit_threshold); 1650 } 1651 1609 1652 void 1610 1653 ChertWritableDatabase::add_spelling(const string & word, 1611 1654 Xapian::termcount freqinc) const -
backends/chert/chert_database.h
312 312 /** The number of documents added, deleted, or replaced since the last 313 313 * flush. 314 314 */ 315 mutable Xapian::doccount change _count;315 mutable Xapian::doccount changes_since_flush; 316 316 317 /// If change_count reaches this threshold we automatically flush. 317 /** The number of documents added, deleted, or replaced since the last 318 * commit. 319 */ 320 mutable Xapian::doccount changes_since_commit; 321 322 /** If changes_since_flush reaches this threshold we automatically 323 * flush. 324 */ 318 325 Xapian::doccount flush_threshold; 319 326 327 /** If changes_since_commit reaches this threshold we automatically 328 * commit. 329 */ 330 Xapian::doccount commit_threshold; 331 320 332 /** A pointer to the last document which was returned by 321 333 * open_document(), or NULL if there is no such valid document. This 322 334 * is used purely for comparing with a supplied document to help with … … 338 350 /// Apply changes. 339 351 void apply(); 340 352 353 /// Increment the change counts, and flush/commit if appropriate. 354 void inc_change_counts(); 355 341 356 /** Add or modify an entry in freq_deltas. 342 357 * 343 358 * @param tname The term to modify the entry for. … … 383 398 /** Cancel pending modifications to the database. */ 384 399 void cancel(); 385 400 401 /// Set the threshold at which changes are flushed automatically. 402 Xapian::doccount set_flush_threshold(Xapian::doccount); 403 404 /// Set the threshold at which changes are committed automatically. 405 Xapian::doccount set_commit_threshold(Xapian::doccount); 406 386 407 Xapian::docid add_document(const Xapian::Document & document); 387 408 Xapian::docid add_document_(Xapian::docid did, const Xapian::Document & document); 388 409 // Stop the default implementation of delete_document(term) and -
backends/brass/brass_database.cc
1037 1037 BrassWritableDatabase::BrassWritableDatabase(const string &dir, int action, 1038 1038 int block_size) 1039 1039 : BrassDatabase(dir, action, block_size), 1040 change_count(0), 1040 changes_since_flush(0), 1041 changes_since_commit(0), 1041 1042 flush_threshold(0), 1043 commit_threshold(0), 1042 1044 modify_shortcut_document(NULL), 1043 1045 modify_shortcut_docid(0) 1044 1046 { … … 1047 1049 const char *p = getenv("XAPIAN_FLUSH_THRESHOLD"); 1048 1050 if (p) 1049 1051 flush_threshold = atoi(p); 1050 if (flush_threshold == 0)1052 else 1051 1053 flush_threshold = 10000; 1054 commit_threshold = flush_threshold; 1052 1055 } 1053 1056 1054 1057 BrassWritableDatabase::~BrassWritableDatabase() … … 1062 1065 { 1063 1066 if (transaction_active()) 1064 1067 throw Xapian::InvalidOperationError("Can't commit during a transaction"); 1065 if (change_count) flush_postlist_changes(); 1068 if (changes_since_flush) 1069 flush_postlist_changes(); 1066 1070 apply(); 1067 1071 } 1068 1072 … … 1072 1076 stats.write(postlist_table); 1073 1077 inverter.flush(postlist_table); 1074 1078 1075 change _count= 0;1079 changes_since_flush = 0; 1076 1080 } 1077 1081 1078 1082 void … … 1091 1095 { 1092 1096 value_manager.set_value_stats(value_stats); 1093 1097 BrassDatabase::apply(); 1098 changes_since_commit = 0; 1094 1099 } 1095 1100 1101 void 1102 BrassWritableDatabase::inc_change_counts() 1103 { 1104 // FIXME: this should be done by checking memory usage, not the number of 1105 // changes. 1106 // We could also look at: 1107 // * mod_plists.size() 1108 // * doclens.size() 1109 // * freq_deltas.size() 1110 // 1111 // cout << "+++ mod_plists.size() " << mod_plists.size() << 1112 // ", doclens.size() " << doclens.size() << 1113 // ", freq_deltas.size() " << freq_deltas.size() << endl; 1114 1115 ++changes_since_commit; 1116 ++changes_since_flush; 1117 1118 if (commit_threshold && changes_since_commit >= commit_threshold) { 1119 flush_postlist_changes(); 1120 if (!transaction_active()) apply(); 1121 } else if (flush_threshold && changes_since_flush >= flush_threshold) { 1122 flush_postlist_changes(); 1123 } 1124 } 1125 1096 1126 Xapian::docid 1097 1127 BrassWritableDatabase::add_document(const Xapian::Document & document) 1098 1128 { … … 1159 1189 throw; 1160 1190 } 1161 1191 1162 // FIXME: this should be done by checking memory usage, not the number of 1163 // changes. We could also look at the amount of data the inverter object 1164 // currently holds. 1165 if (++change_count >= flush_threshold) { 1166 flush_postlist_changes(); 1167 if (!transaction_active()) apply(); 1168 } 1192 inc_change_counts(); 1169 1193 1170 1194 RETURN(did); 1171 1195 } … … 1226 1250 throw; 1227 1251 } 1228 1252 1229 if (++change_count >= flush_threshold) { 1230 flush_postlist_changes(); 1231 if (!transaction_active()) apply(); 1232 } 1253 inc_change_counts(); 1233 1254 } 1234 1255 1235 1256 void … … 1391 1412 throw; 1392 1413 } 1393 1414 1394 if (++change_count >= flush_threshold) { 1395 flush_postlist_changes(); 1396 if (!transaction_active()) apply(); 1397 } 1415 inc_change_counts(); 1398 1416 } 1399 1417 1400 1418 Xapian::Document::Internal * … … 1497 1515 // If there are changes, we don't have code to iterate the modified value 1498 1516 // list so we need to flush (but don't commit - there may be a transaction 1499 1517 // in progress). 1500 if (change _count) value_manager.merge_changes();1518 if (changes_since_flush) value_manager.merge_changes(); 1501 1519 RETURN(BrassDatabase::open_value_list(slot)); 1502 1520 } 1503 1521 … … 1505 1523 BrassWritableDatabase::open_allterms(const string & prefix) const 1506 1524 { 1507 1525 LOGCALL(DB, TermList *, "BrassWritableDatabase::open_allterms", NO_ARGS); 1508 if (change _count) {1526 if (changes_since_flush) { 1509 1527 // There are changes, and terms may have been added or removed, and so 1510 1528 // we need to flush changes for terms with the specified prefix (but 1511 1529 // don't commit - there may be a transaction in progress). 1512 1530 inverter.flush_post_lists(postlist_table, prefix); 1513 1531 if (prefix.empty()) { 1514 1532 // We've flushed all the posting list changes, but the document 1515 // length and stats haven't been written, so set change_count to 1. 1533 // length and stats haven't been written, so set 1534 // changes_since_flush to 1. 1516 1535 // FIXME: Can we handle this better? 1517 change _count= 1;1536 changes_since_flush = 1; 1518 1537 } 1519 1538 } 1520 1539 RETURN(BrassDatabase::open_allterms(prefix)); … … 1528 1547 1529 1548 inverter.clear(); 1530 1549 value_stats.clear(); 1531 change_count = 0; 1550 changes_since_flush = 0; 1551 changes_since_commit = 0; 1532 1552 } 1533 1553 1554 doccount 1555 BrassWritableDatabase::set_flush_threshold(doccount new_flush_threshold) 1556 { 1557 LOGCALL(DB, doccount, "BrassWritableDatabase::set_flush_threshold", 1558 new_flush_threshold); 1559 doccount old_flush_threshold = flush_threshold; 1560 flush_threshold = new_flush_threshold; 1561 1562 if (changes_since_flush >= flush_threshold) { 1563 flush_postlist_changes(); 1564 } 1565 1566 RETURN(old_flush_threshold); 1567 } 1568 1569 doccount 1570 BrassWritableDatabase::set_commit_threshold(doccount new_commit_threshold) 1571 { 1572 LOGCALL(DB, doccount, "BrassWritableDatabase::set_commit_threshold", 1573 new_commit_threshold); 1574 doccount old_commit_threshold = commit_threshold; 1575 commit_threshold = new_commit_threshold; 1576 1577 if (changes_since_commit >= commit_threshold) { 1578 flush_postlist_changes(); 1579 if (!transaction_active()) apply(); 1580 } 1581 1582 RETURN(old_commit_threshold); 1583 } 1584 1534 1585 void 1535 1586 BrassWritableDatabase::add_spelling(const string & word, 1536 1587 Xapian::termcount freqinc) const -
backends/brass/brass_database.h
305 305 /** The number of documents added, deleted, or replaced since the last 306 306 * flush. 307 307 */ 308 mutable Xapian::doccount change _count;308 mutable Xapian::doccount changes_since_flush; 309 309 310 /// If change_count reaches this threshold we automatically flush. 310 /** The number of documents added, deleted, or replaced since the last 311 * commit. 312 */ 313 mutable Xapian::doccount changes_since_commit; 314 315 /** If changes_since_flush reaches this threshold we automatically 316 * flush. 317 */ 311 318 Xapian::doccount flush_threshold; 312 319 320 /** If changes_since_commit reaches this threshold we automatically 321 * commit. 322 */ 323 Xapian::doccount commit_threshold; 324 313 325 /** A pointer to the last document which was returned by 314 326 * open_document(), or NULL if there is no such valid document. This 315 327 * is used purely for comparing with a supplied document to help with … … 331 343 /// Apply changes. 332 344 void apply(); 333 345 346 /// Increment the change counts, and flush/commit if appropriate. 347 void inc_change_counts(); 348 334 349 //@{ 335 350 /** Implementation of virtual methods: see Database::Internal for 336 351 * details. … … 340 355 /** Cancel pending modifications to the database. */ 341 356 void cancel(); 342 357 358 /// Set the threshold at which changes are flushed automatically. 359 Xapian::doccount set_flush_threshold(Xapian::doccount); 360 361 /// Set the threshold at which changes are committed automatically. 362 Xapian::doccount set_commit_threshold(Xapian::doccount); 363 343 364 Xapian::docid add_document(const Xapian::Document & document); 344 365 Xapian::docid add_document_(Xapian::docid did, const Xapian::Document & document); 345 366 // Stop the default implementation of delete_document(term) and