Changeset 10716
- Timestamp:
- 2008-06-23 00:47:29 (7 months ago)
- Location:
- trunk/xapian-core
- Files:
-
- 2 added
- 8 modified
-
ChangeLog (modified) (1 diff)
-
backends/chert/Makefile.mk (modified) (2 diffs)
-
backends/chert/chert_alldocsmodifiedpostlist.cc (added)
-
backends/chert/chert_alldocsmodifiedpostlist.h (added)
-
backends/chert/chert_alldocspostlist.cc (modified) (6 diffs)
-
backends/chert/chert_alldocspostlist.h (modified) (4 diffs)
-
backends/chert/chert_database.cc (modified) (4 diffs)
-
backends/chert/chert_modifiedpostlist.h (modified) (1 diff)
-
backends/chert/chert_postlist.cc (modified) (5 diffs)
-
backends/chert/chert_postlist.h (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
trunk/xapian-core/ChangeLog
r10715 r10716 1 Sun Jun 22 23:44:46 GMT 2008 Richard Boulton <richard@lemurconsulting.com> 2 3 * backends/chert/: Change ChertAllDocsPostList to inherit from 4 ChertPostList and use the doclen list from the posting table 5 rather than using the termlist table. This helps towards making 6 the termlist table optional, reduces the amount of data read in 7 the process of iterating through an alldocs postlist, and can 8 make a massive difference in performance: I've measured the time 9 to iterate through all the documents in a 1000000 document 10 database, and the patch speeds this operation up by a factor of 11 6. 12 13 Add `keep_reference` parameter to the ChertPostList constructor, 14 instead of not keeping a reference if the term is empty. 15 ChertAllDocsPostList uses this to keep a reference to the 16 database while using an empty term. 17 18 Add ChertAllDocsModifiedPostList class, inspired by 19 ChertModifiedPostList, (with corresponding new source files) to 20 handle alldocs postlist with modifications: this wasn't needed 21 before since the termlist is updated immediately after changes. 22 1 23 Sun Jun 22 21:37:00 GMT 2008 Richard Boulton <richard@lemurconsulting.com> 2 24 -
trunk/xapian-core/backends/chert/Makefile.mk
r10346 r10716 5 5 if BUILD_BACKEND_CHERT 6 6 noinst_HEADERS +=\ 7 backends/chert/chert_alldocsmodifiedpostlist.h\ 7 8 backends/chert/chert_alldocspostlist.h\ 8 9 backends/chert/chert_alltermslist.h\ … … 31 32 32 33 libxapian_la_SOURCES +=\ 34 backends/chert/chert_alldocsmodifiedpostlist.cc\ 33 35 backends/chert/chert_alldocspostlist.cc\ 34 36 backends/chert/chert_alltermslist.cc\ -
trunk/xapian-core/backends/chert/chert_alldocspostlist.cc
r10346 r10716 3 3 */ 4 4 /* Copyright (C) 2006,2007,2008 Olly Betts 5 * Copyright (C) 2008 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify … … 23 24 #include <string> 24 25 26 #include "chert_alldocspostlist.h" 25 27 #include "chert_database.h" 26 #include "chert_alldocspostlist.h"27 28 28 29 #include "utils.h" … … 30 31 using namespace std; 31 32 33 ChertAllDocsPostList::ChertAllDocsPostList(Xapian::Internal::RefCntPtr<const ChertDatabase> db_, 34 Xapian::doccount doccount_) 35 : ChertPostList(db_, string(), true), 36 doccount(doccount_) 37 { 38 DEBUGCALL(DB, void, "ChertAllDocsPostList::ChertAllDocsPostList", db_.get() << ", " << doccount_); 39 } 40 32 41 Xapian::doccount 33 42 ChertAllDocsPostList::get_termfreq() const 34 43 { 35 return doccount; 36 } 37 38 Xapian::docid 39 ChertAllDocsPostList::get_docid() const 40 { 41 return current_did; 44 DEBUGCALL(DB, Xapian::doccount, "ChertAllDocsPostList::get_termfreq", ""); 45 RETURN(doccount); 42 46 } 43 47 … … 46 50 { 47 51 DEBUGCALL(DB, Xapian::doclength, "ChertAllDocsPostList::get_doclength", ""); 48 Assert(current_did);49 52 50 cursor->read_tag(); 51 52 if (cursor->current_tag.empty()) RETURN(0); 53 54 const char * pos = cursor->current_tag.data(); 55 const char * end = pos + cursor->current_tag.size(); 56 57 chert_doclen_t doclen; 58 if (!unpack_uint(&pos, end, &doclen)) { 59 const char *msg; 60 if (pos == 0) { 61 msg = "Too little data for doclen in termlist"; 62 } else { 63 msg = "Overflowed value for doclen in termlist"; 64 } 65 throw Xapian::DatabaseCorruptError(msg); 66 } 67 68 RETURN(doclen); 53 RETURN(ChertPostList::get_wdf()); 69 54 } 70 55 … … 73 58 { 74 59 DEBUGCALL(DB, Xapian::termcount, "ChertAllDocsPostList::get_wdf", ""); 75 Assert (current_did);60 AssertParanoid(!at_end()); 76 61 RETURN(1); 77 62 } 78 63 79 Pos tList *80 ChertAllDocsPostList::read_ did_from_current_key()64 PositionList * 65 ChertAllDocsPostList::read_position_list() 81 66 { 82 DEBUGCALL(DB, PostList *, "ChertAllDocsPostList::read_did_from_current_key", 83 ""); 84 const string & key = cursor->current_key; 85 const char * pos = key.data(); 86 const char * end = pos + key.size(); 87 if (!unpack_uint_preserving_sort(&pos, end, ¤t_did)) { 88 const char *msg; 89 if (pos == 0) { 90 msg = "Too little data in termlist key"; 91 } else { 92 msg = "Overflowed value in termlist key"; 93 } 94 throw Xapian::DatabaseCorruptError(msg); 95 } 96 97 // Return NULL to help the compiler tail-call optimise our callers. 98 RETURN(NULL); 67 DEBUGCALL(DB, Xapian::termcount, "ChertAllDocsPostList::read_position_list", ""); 68 throw Xapian::InvalidOperationError("ChertAllDocsPostList::read_position_list() not meaningful"); 99 69 } 100 70 101 Pos tList *102 ChertAllDocsPostList:: next(Xapian::weight /*w_min*/)71 PositionList * 72 ChertAllDocsPostList::open_position_list() const 103 73 { 104 DEBUGCALL(DB, PostList *, "ChertAllDocsPostList::next", "/*w_min*/"); 105 Assert(!at_end()); 106 if (!cursor->next()) RETURN(NULL); 107 RETURN(read_did_from_current_key()); 108 } 109 110 PostList * 111 ChertAllDocsPostList::skip_to(Xapian::docid did, Xapian::weight /*w_min*/) 112 { 113 DEBUGCALL(DB, PostList *, "ChertAllDocsPostList::skip_to", 114 did << ", /*w_min*/"); 115 116 if (did <= current_did || at_end()) RETURN(NULL); 117 118 if (cursor->find_entry_ge(pack_uint_preserving_sort(did))) { 119 // The exact docid that was asked for exists. 120 current_did = did; 121 RETURN(NULL); 122 } 123 if (cursor->after_end()) RETURN(NULL); 124 125 RETURN(read_did_from_current_key()); 126 } 127 128 bool 129 ChertAllDocsPostList::at_end() const { 130 DEBUGCALL(DB, bool, "ChertAllDocsPostList::at_end", ""); 131 RETURN(cursor->after_end()); 74 DEBUGCALL(DB, Xapian::termcount, "ChertAllDocsPostList::open_position_list", ""); 75 throw Xapian::InvalidOperationError("ChertAllDocsPostList::open_position_list() not meaningful"); 132 76 } 133 77 … … 136 80 { 137 81 string desc = "ChertAllDocsPostList(did="; 138 desc += om_tostring( current_did);82 desc += om_tostring(get_docid()); 139 83 desc += ",doccount="; 140 84 desc += om_tostring(doccount); -
trunk/xapian-core/backends/chert/chert_alldocspostlist.h
r10346 r10716 3 3 */ 4 4 /* Copyright (C) 2006,2007,2008 Olly Betts 5 * Copyright (C) 2008 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify … … 25 26 26 27 #include "leafpostlist.h" 28 #include "chert_postlist.h" 27 29 28 class ChertAllDocsPostList : public LeafPostList {30 class ChertAllDocsPostList : public ChertPostList { 29 31 /// Don't allow assignment. 30 32 void operator=(const ChertAllDocsPostList &); … … 33 35 ChertAllDocsPostList(const ChertAllDocsPostList &); 34 36 35 /// Set @a current_did from @a cursor->current_key.36 PostList * read_did_from_current_key();37 38 /// The database we're iterating over.39 Xapian::Internal::RefCntPtr<const ChertDatabase> db;40 41 37 /// The number of documents in the database. 42 38 Xapian::doccount doccount; 43 39 44 /// Cursor running over termlist table keys.45 AutoPtr<ChertCursor> cursor;46 47 /// The current document id.48 Xapian::docid current_did;49 50 40 public: 51 41 ChertAllDocsPostList(Xapian::Internal::RefCntPtr<const ChertDatabase> db_, 52 Xapian::doccount doccount_) 53 : db(db_), doccount(doccount_), cursor(db->termlist_table.cursor_get()), 54 current_did(0) 55 { 56 cursor->find_entry(""); 57 } 42 Xapian::doccount doccount_); 58 43 59 44 Xapian::doccount get_termfreq() const; 60 61 Xapian::docid get_docid() const;62 45 63 46 Xapian::doclength get_doclength() const; … … 65 48 Xapian::termcount get_wdf() const; 66 49 67 Pos tList * next(Xapian::weight w_min);50 PositionList *read_position_list(); 68 51 69 PostList * skip_to(Xapian::docid desired_did, Xapian::weight w_min); 70 71 bool at_end() const; 52 PositionList *open_position_list() const; 72 53 73 54 std::string get_description() const; -
trunk/xapian-core/backends/chert/chert_database.cc
r10675 r10716 32 32 33 33 #include "contiguousalldocspostlist.h" 34 #include "chert_alldocsmodifiedpostlist.h" 34 35 #include "chert_alldocspostlist.h" 35 36 #include "chert_alltermslist.h" … … 915 916 } 916 917 917 RETURN(new ChertPostList(ptrtothis, term ));918 RETURN(new ChertPostList(ptrtothis, term, true)); 918 919 } 919 920 … … 1804 1805 RETURN(new ContiguousAllDocsPostList(ptrtothis, doccount)); 1805 1806 } 1806 RETURN(new ChertAllDocsPostList(ptrtothis, doccount)); 1807 if (doclens.empty()) { 1808 RETURN(new ChertAllDocsPostList(ptrtothis, doccount)); 1809 } else { 1810 RETURN(new ChertAllDocsModifiedPostList(ptrtothis, doccount, doclens)); 1811 } 1807 1812 } 1808 1813 … … 1815 1820 } 1816 1821 1817 RETURN(new ChertPostList(ptrtothis, tname ));1822 RETURN(new ChertPostList(ptrtothis, tname, true)); 1818 1823 } 1819 1824 -
trunk/xapian-core/backends/chert/chert_modifiedpostlist.h
r10346 r10716 47 47 const string & tname_, 48 48 const map<Xapian::docid, pair<char, Xapian::termcount> > & mods_) 49 : ChertPostList(this_db_, tname_ ),49 : ChertPostList(this_db_, tname_, true), 50 50 mods(mods_), it(mods.begin()), poslist(0) 51 51 { } -
trunk/xapian-core/backends/chert/chert_postlist.cc
r10428 r10716 3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002,2003,2004,2005,2007,2008 Olly Betts 5 * Copyright 2007 Lemur Consulting Ltd5 * Copyright 2007,2008 Lemur Consulting Ltd 6 6 * 7 7 * This program is free software; you can redistribute it and/or … … 62 62 Xapian::Internal::RefCntPtr<const ChertDatabase> db) const { 63 63 if (!doclen_pl.get()) { 64 doclen_pl.reset(new ChertPostList(db, string())); 64 // Don't keep a reference back to the database, since this 65 // would make a reference loop. 66 doclen_pl.reset(new ChertPostList(db, string(), false)); 65 67 } 66 68 if (!doclen_pl->jump_to(did)) … … 656 658 */ 657 659 ChertPostList::ChertPostList(Xapian::Internal::RefCntPtr<const ChertDatabase> this_db_, 658 const string & tname_) 659 : this_db(tname_.empty() ? NULL : this_db_), // Don't keep a reference if this is a "doclen postlist". 660 const string & tname_, 661 bool keep_reference) 662 : this_db(keep_reference ? this_db_ : NULL), 660 663 tname(tname_), 661 664 have_started(false), … … 664 667 { 665 668 DEBUGCALL(DB, void, "ChertPostList::ChertPostList", 666 this_db_.get() << ", " << tname_ );669 this_db_.get() << ", " << tname_ << ", " << keep_reference); 667 670 string key = ChertPostListTable::make_key(tname); 668 671 int found = cursor->find_entry(key); 669 672 if (!found) { 673 DEBUGLINE(DB, "postlist for term not found"); 670 674 number_of_entries = 0; 671 675 is_at_end = true; … … 685 689 &is_last_chunk); 686 690 read_wdf(&pos, end, &wdf); 691 DEBUGLINE(DB, "Initial docid " << did); 687 692 } 688 693 -
trunk/xapian-core/backends/chert/chert_postlist.h
r10346 r10716 224 224 /// Default constructor. 225 225 ChertPostList(Xapian::Internal::RefCntPtr<const ChertDatabase> this_db_, 226 const string & tname); 226 const string & tname, 227 bool keep_reference); 227 228 228 229 /// Destructor.
