Ticket #47: alldocpostlists.2.patch
File alldocpostlists.2.patch, 50.2 KB (added by , 18 years ago) |
---|
-
docs/quartzdesign.html
243 243 It is quite possible that the termlists and 244 244 position lists would benefit from being split into chunks in this way. 245 245 246 <h2>All document lists</h2> 247 248 It is possible to use the Xapian API to obtain a list of all documents in the 249 database. This is done by creating a special postinglist. This functionality 250 was added after the file structure in use by Quartz was frozen, and it is 251 unfortunately impossible to implement efficiently for Quartz. 252 253 The problem is that it is not possible to read the list of documents in sorted 254 order direct from disk - instead, the list is read into memory to be sorted. 255 For databases which do not have sparse document IDs, this should not use much 256 memory since the list is kept in memory in a range-compressed form (but does 257 require an iteration over the entirety of one of the tables of the Quartz 258 database - no skipping can be done in this case. This is unlikely to be fixed, 259 since we don't believe it can be without changing Quartz's structure. In any 260 case, it is not a priority since Quartz is due to be replaced by Flint as the 261 default backend soon. 262 246 263 <h2>Btree implementation</h2> 247 264 248 265 The tables are currently all implemented as B-trees (actually a form of -
tests/apitest.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2006 Olly Betts 6 * Copyright 2006 Richard Boulton 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 45 46 BackendManager backendmanager; 46 47 47 48 Xapian::Database 49 get_database() 50 { 51 vector<string> dbnames; 52 return backendmanager.get_database(dbnames); 53 } 54 55 Xapian::Database 48 56 get_database(const string &dbname) 49 57 { 50 58 return backendmanager.get_database(dbname); … … 101 109 RUNTESTS("inmemory", anydb); 102 110 RUNTESTS("inmemory", specchar); 103 111 RUNTESTS("inmemory", writabledb); 112 RUNTESTS("inmemory", writablelocaldb); 104 113 RUNTESTS("inmemory", localdb); 105 114 RUNTESTS("inmemory", positionaldb); 106 115 RUNTESTS("inmemory", localpositionaldb); … … 114 123 RUNTESTS("flint", anydb); 115 124 RUNTESTS("flint", specchar); 116 125 RUNTESTS("flint", writabledb); 126 RUNTESTS("flint", writablelocaldb); 117 127 RUNTESTS("flint", localdb); 118 128 RUNTESTS("flint", positionaldb); 119 129 RUNTESTS("flint", localpositionaldb); … … 129 139 RUNTESTS("quartz", anydb); 130 140 RUNTESTS("quartz", specchar); 131 141 RUNTESTS("quartz", writabledb); 142 RUNTESTS("quartz", writablelocaldb); 132 143 RUNTESTS("quartz", localdb); 133 144 RUNTESTS("quartz", positionaldb); 134 145 RUNTESTS("quartz", localpositionaldb); -
tests/apitest.h
3 3 * ----START-LICENCE---- 4 4 * Copyright 1999,2000,2001 BrightStation PLC 5 5 * Copyright 2003,2004 Olly Betts 6 * Copyright 2006 Richard Boulton 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 26 27 27 28 #include <xapian.h> 28 29 30 Xapian::Database get_database(); 29 31 Xapian::Database get_database(const std::string &dbname); 30 32 Xapian::Database get_database(const std::string &dbname, 31 33 const std::string &dbname2); -
tests/api_db.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006 Olly Betts 6 * Copyright 2006 Richard Boulton 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 895 896 { 896 897 Xapian::Database db(get_database("apitest_simpledata")); 897 898 898 TEST_EXCEPTION(Xapian::InvalidArgumentError, db.postlist_begin(""));899 900 899 TEST_EQUAL(db.postlist_begin("rosebud"), db.postlist_end("rosebud")); 901 900 902 901 string s = "let_us_see_if_we_can_break_it_with_a_really_really_long_term."; … … 1011 1010 return true; 1012 1011 } 1013 1012 1013 // tests all document postlists 1014 static bool test_allpostlist1() 1015 { 1016 Xapian::Database db(get_database("apitest_manydocs")); 1017 Xapian::PostingIterator i = db.postlist_begin(""); 1018 unsigned int j = 1; 1019 while (i != db.postlist_end("")) { 1020 TEST_EQUAL(*i, j); 1021 i++; 1022 j++; 1023 } 1024 TEST_EQUAL(j, 513); 1025 1026 i = db.postlist_begin(""); 1027 j = 1; 1028 while (i != db.postlist_end("")) { 1029 TEST_EQUAL(*i, j); 1030 i++; 1031 j++; 1032 if (j == 50) { 1033 j += 10; 1034 i.skip_to(j); 1035 } 1036 } 1037 TEST_EQUAL(j, 513); 1038 1039 return true; 1040 } 1041 1042 static void test_emptyterm1_helper(Xapian::Database & db) 1043 { 1044 // Don't bother with postlist_begin() because allpostlist tests cover that. 1045 TEST_EXCEPTION(Xapian::InvalidArgumentError, db.positionlist_begin(1, "")); 1046 TEST_EQUAL(db.get_doccount(), db.get_termfreq("")); 1047 TEST_EQUAL(db.get_doccount() != 0, db.term_exists("")); 1048 TEST_EQUAL(db.get_doccount(), db.get_collection_freq("")); 1049 } 1050 1051 // tests results of passing an empty term to various methods 1052 static bool test_emptyterm1() 1053 { 1054 Xapian::Database db(get_database("apitest_manydocs")); 1055 TEST_EQUAL(db.get_doccount(), 512); 1056 test_emptyterm1_helper(db); 1057 1058 db = get_database("apitest_onedoc"); 1059 TEST_EQUAL(db.get_doccount(), 1); 1060 test_emptyterm1_helper(db); 1061 1062 db = get_database(); 1063 TEST_EQUAL(db.get_doccount(), 0); 1064 test_emptyterm1_helper(db); 1065 1066 return true; 1067 } 1068 1014 1069 // tests collection frequency 1015 1070 static bool test_collfreq1() 1016 1071 { … … 1379 1434 {"postlist4", test_postlist4}, 1380 1435 {"postlist5", test_postlist5}, 1381 1436 {"postlist6", test_postlist6}, 1437 {"allpostlist1", test_allpostlist1}, 1438 {"emptyterm1", test_emptyterm1}, 1382 1439 {"termstats", test_termstats}, 1383 1440 {"sortvalue1", test_sortvalue1}, 1384 1441 // consistency1 will run on the remote backend, but it's particularly slow -
tests/api_wrdb.cc
4 4 * Copyright 2001 Hein Ragas 5 5 * Copyright 2002 Ananova Ltd 6 6 * Copyright 2002,2003,2004,2005,2006 Olly Betts 7 * Copyright 2006 Richard Boulton 7 8 * 8 9 * This program is free software; you can redistribute it and/or 9 10 * modify it under the terms of the GNU General Public License as … … 909 910 return true; 910 911 } 911 912 913 // tests all document postlists 914 static bool test_allpostlist2() 915 { 916 Xapian::WritableDatabase db(get_writable_database("apitest_manydocs")); 917 Xapian::PostingIterator i = db.postlist_begin(""); 918 unsigned int j = 1; 919 while (i != db.postlist_end("")) { 920 TEST_EQUAL(*i, j); 921 i++; 922 j++; 923 } 924 TEST_EQUAL(j, 513); 925 926 db.delete_document(1); 927 db.delete_document(50); 928 db.delete_document(512); 929 930 i = db.postlist_begin(""); 931 j = 2; 932 while (i != db.postlist_end("")) { 933 TEST_EQUAL(*i, j); 934 i++; 935 j++; 936 if (j == 50) j++; 937 } 938 TEST_EQUAL(j, 512); 939 940 i = db.postlist_begin(""); 941 j = 2; 942 while (i != db.postlist_end("")) { 943 TEST_EQUAL(*i, j); 944 i++; 945 j++; 946 if (j == 40) { 947 j += 10; 948 i.skip_to(j); 949 j++; 950 } 951 } 952 TEST_EQUAL(j, 512); 953 954 return true; 955 } 956 957 static void test_emptyterm2_helper(Xapian::WritableDatabase & db) 958 { 959 // Don't bother with postlist_begin() because allpostlist tests cover that. 960 TEST_EXCEPTION(Xapian::InvalidArgumentError, db.positionlist_begin(1, "")); 961 TEST_EQUAL(db.get_doccount(), db.get_termfreq("")); 962 TEST_EQUAL(db.get_doccount() != 0, db.term_exists("")); 963 TEST_EQUAL(db.get_doccount(), db.get_collection_freq("")); 964 } 965 966 // tests results of passing an empty term to various methods 967 // equivalent of emptyterm1 for a writable database 968 static bool test_emptyterm2() 969 { 970 Xapian::WritableDatabase db(get_writable_database("apitest_manydocs")); 971 TEST_EQUAL(db.get_doccount(), 512); 972 test_emptyterm2_helper(db); 973 db.delete_document(1); 974 TEST_EQUAL(db.get_doccount(), 511); 975 test_emptyterm2_helper(db); 976 db.delete_document(50); 977 TEST_EQUAL(db.get_doccount(), 510); 978 test_emptyterm2_helper(db); 979 db.delete_document(512); 980 TEST_EQUAL(db.get_doccount(), 509); 981 test_emptyterm2_helper(db); 982 983 db = get_writable_database("apitest_onedoc"); 984 TEST_EQUAL(db.get_doccount(), 1); 985 test_emptyterm2_helper(db); 986 db.delete_document(1); 987 TEST_EQUAL(db.get_doccount(), 0); 988 test_emptyterm2_helper(db); 989 990 db = get_writable_database(""); 991 TEST_EQUAL(db.get_doccount(), 0); 992 test_emptyterm2_helper(db); 993 994 return true; 995 } 996 912 997 // Check that PHRASE/NEAR becomes AND if there's no positional info in the 913 998 // database. 914 999 static bool test_phraseorneartoand1() … … 1035 1120 {"replacedoc3", test_replacedoc3}, 1036 1121 {"replacedoc4", test_replacedoc4}, 1037 1122 {"uniqueterm1", test_uniqueterm1}, 1123 {"emptyterm2", test_emptyterm2}, 1038 1124 {"phraseorneartoand1", test_phraseorneartoand1}, 1039 1125 {"longpositionlist1", test_longpositionlist1}, 1040 1126 {0, 0} 1041 1127 }; 1128 1129 /// The tests which use a writable, but local, backend 1130 test_desc writablelocaldb_tests[] = { 1131 {"allpostlist2", test_allpostlist2}, 1132 {0, 0} 1133 }; -
tests/api_wrdb.h
3 3 * ----START-LICENCE---- 4 4 * Copyright 1999,2000,2001 BrightStation PLC 5 5 * Copyright 2004 Olly Betts 6 * Copyright 2006 Richard Boulton 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 27 28 #include "testsuite.h" 28 29 29 30 extern test_desc writabledb_tests[]; 31 extern test_desc writablelocaldb_tests[]; 30 32 31 33 #endif /* XAPIAN_HGUARD_API_WRDB_H */ -
include/xapian/database.h
4 4 /* Copyright 1999,2000,2001 BrightStation PLC 5 5 * Copyright 2002 Ananova Ltd 6 6 * Copyright 2002,2003,2004,2005,2006 Olly Betts 7 * Copyright 2006 Richard Boulton 7 8 * 8 9 * This program is free software; you can redistribute it and/or 9 10 * modify it under the terms of the GNU General Public License as … … 108 109 109 110 /** An iterator pointing to the start of the postlist 110 111 * for a given term. 112 * 113 * If the term name is the empty string, the iterator returned 114 * will list all the documents in the database. Such an iterator 115 * will always return a WDF value of 1, since there is no obvious 116 * meaning for this quantity in this case. 111 117 */ 112 118 PostingIterator postlist_begin(const std::string &tname) const; 113 119 -
ChangeLog
1 Wed Oct 11 19:05:38 BST 2006 Richard Boulton <richard@lemurconsulting.com> 2 3 * common/database.h,api/omdatabase.cc, 4 backends/inmemory/inmemory_database.cc, 5 backends/inmemory/inmemory_database.h, 6 backends/quartz/Makefile.am,backends/quartz/quartz_database.cc, 7 backends/quartz/quartz_alldocspostlist.h, 8 backends/quartz/quartz_alldocspostlist.cc, 9 backends/flint/Makefile.am,backends/flint/flint_database.cc, 10 backends/flint/flint_alldocspostlist.cc, 11 backends/flint/flint_alldocspostlist.h: 12 Implement posting lists which return a list of all documents in 13 the database. Such a posting list is obtained by calling 14 Xapian::Database::postlist_begin() with an empty term (ie, ""). 15 Also, all Xapian::Database methods which take a termname now 16 accept an empty term, and return appropriate values (ie, 17 get_termfreq("") and get_collection_freq("") return the number of 18 documents in the database, and term_exists("") returns true 19 unless the database is empty). 20 * docs/quartzdesign.html: Document the inefficiency of all-document 21 postlists for Quartz. 22 * tests/apitest.cc,tests/apitest.h,tests/api_db.cc, 23 tests/api_wrdb.cc,tests/api_wrdb.h: Add tests for all-document 24 postlists, and for passing an empty term to all the applicable 25 datbase methods. This defines the new tests allpostlist[12], and 26 emptyterm[12] Includes defining a new test category for databases 27 which are local and writable, and adding a function to get an 28 empty database (for testing the empty term methods with such a 29 database). 30 1 31 Tue Oct 10 17:24:00 BST 2006 Olly Betts <olly@survex.com> 2 32 3 33 * NEWS: Bump release date. -
common/database.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006 Olly Betts 6 * Copyright 2006 Richard Boulton 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 188 189 * use. 189 190 */ 190 191 LeafPostList * open_post_list(const string & tname) const { 191 if (!t erm_exists(tname)) {192 if (!tname.empty() && !term_exists(tname)) { 192 193 DEBUGLINE(MATCH, tname + " is not in database."); 193 194 // Term doesn't exist in this database. However, we create 194 195 // a (empty) postlist for it to help make distributed searching -
api/omdatabase.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2001,2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006 Olly Betts 6 * Copyright 2006 Richard Boulton 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 101 102 Database::postlist_begin(const string &tname) const 102 103 { 103 104 DEBUGAPICALL(PostingIterator, "Database::postlist_begin", tname); 104 if (tname.empty())105 throw InvalidArgumentError("Zero length terms are invalid");106 105 107 106 // Don't bother checking that the term exists first. If it does, we 108 107 // just end up doing more work, and if it doesn't, we save very little … … 248 247 Database::get_termfreq(const string & tname) const 249 248 { 250 249 DEBUGAPICALL(Xapian::doccount, "Database::get_termfreq", tname); 251 if (tname.empty()) 252 throw InvalidArgumentError("Zero length terms are invalid"); 250 if (tname.empty()) { 251 return get_doccount(); 252 } 253 253 Xapian::doccount tf = 0; 254 254 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i; 255 255 for (i = internal.begin(); i != internal.end(); i++) { … … 262 262 Database::get_collection_freq(const string & tname) const 263 263 { 264 264 DEBUGAPICALL(Xapian::termcount, "Database::get_collection_freq", tname); 265 if (tname.empty()) 266 throw InvalidArgumentError("Zero length terms are invalid"); 265 if (tname.empty()) { 266 return get_doccount(); 267 } 267 268 268 269 Xapian::termcount cf = 0; 269 270 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i; … … 303 304 bool 304 305 Database::term_exists(const string & tname) const 305 306 { 306 if (tname.empty()) 307 throw InvalidArgumentError("Zero length terms are invalid"); 307 if (tname.empty()) { 308 return get_doccount() != 0; 309 } 308 310 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i; 309 311 for (i = internal.begin(); i != internal.end(); ++i) { 310 312 if ((*i)->term_exists(tname)) return true; -
backends/inmemory/inmemory_database.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006 Olly Betts 6 * Copyright 2006 Richard Boulton 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 260 261 return Xapian::PositionIterator(db->open_position_list(did, (*pos).tname)); 261 262 } 262 263 264 ///////////////////////////// 265 // InMemoryAllDocsPostList // 266 ///////////////////////////// 267 268 InMemoryAllDocsPostList::InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_) 269 : did(0), db(db_) 270 { 271 } 272 273 Xapian::doccount 274 InMemoryAllDocsPostList::get_termfreq() const 275 { 276 return db->totdocs; 277 } 278 279 Xapian::docid 280 InMemoryAllDocsPostList::get_docid() const 281 { 282 Assert(did > 0); 283 Assert(did <= db->termlists.size()); 284 Assert(db->termlists[did - 1].is_valid); 285 return did; 286 } 287 288 Xapian::doclength 289 InMemoryAllDocsPostList::get_doclength() const 290 { 291 return db->get_doclength(did); 292 } 293 294 Xapian::termcount 295 InMemoryAllDocsPostList::get_wdf() const 296 { 297 return 1; 298 } 299 300 PositionList * 301 InMemoryAllDocsPostList::read_position_list() 302 { 303 throw Xapian::UnimplementedError("Can't open position list for all docs iterator"); 304 } 305 306 PositionList * 307 InMemoryAllDocsPostList::open_position_list() const 308 { 309 throw Xapian::UnimplementedError("Can't open position list for all docs iterator"); 310 } 311 312 PostList * 313 InMemoryAllDocsPostList::next(Xapian::weight /*w_min*/) 314 { 315 Assert(!at_end()); 316 do { 317 ++did; 318 } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid); 319 return NULL; 320 } 321 322 PostList * 323 InMemoryAllDocsPostList::skip_to(Xapian::docid did_, Xapian::weight /*w_min*/) 324 { 325 Assert(!at_end()); 326 if (did <= did_) { 327 did = did_; 328 while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) { 329 ++did; 330 } 331 } 332 return NULL; 333 } 334 335 bool 336 InMemoryAllDocsPostList::at_end() const 337 { 338 return (did > db->termlists.size()); 339 } 340 341 string 342 InMemoryAllDocsPostList::get_description() const 343 { 344 return "InMemoryAllDocsPostList" + om_tostring(did); 345 } 346 263 347 /////////////////////////// 264 348 // Actual database class // 265 349 /////////////////////////// … … 279 363 LeafPostList * 280 364 InMemoryDatabase::do_open_post_list(const string & tname) const 281 365 { 282 Assert(tname.size() != 0); 366 if (tname.empty()) { 367 if (termlists.empty()) 368 return new EmptyPostList(); 369 return new InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase>(this)); 370 } 283 371 map<string, InMemoryTerm>::const_iterator i = postlists.find(tname); 284 372 if (i == postlists.end() || i->second.term_freq == 0) 285 373 return new EmptyPostList(); -
backends/inmemory/inmemory_database.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006 Olly Betts 6 * Copyright 2006 Richard Boulton 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 159 160 string get_description() const; 160 161 }; 161 162 163 /** A PostList over all docs in an inmemory database. 164 */ 165 class InMemoryAllDocsPostList : public LeafPostList { 166 friend class InMemoryDatabase; 167 private: 168 Xapian::docid did; 169 170 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db; 171 172 InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db); 173 public: 174 Xapian::doccount get_termfreq() const; 175 176 Xapian::docid get_docid() const; // Gets current docid 177 Xapian::doclength get_doclength() const; // Length of current document 178 Xapian::termcount get_wdf() const; // Within Document Frequency 179 PositionList * read_position_list(); 180 PositionList * open_position_list() const; 181 182 PostList *next(Xapian::weight w_min); // Moves to next docid 183 184 PostList *skip_to(Xapian::docid did, Xapian::weight w_min); // Moves to next docid >= specified docid 185 186 // True if we're off the end of the list 187 bool at_end() const; 188 189 string get_description() const; 190 }; 191 162 192 // Term List 163 193 class InMemoryTermList : public LeafTermList { 164 194 friend class InMemoryDatabase; … … 193 223 * This is a prototype database, mainly used for debugging and testing. 194 224 */ 195 225 class InMemoryDatabase : public Xapian::Database::Internal { 226 friend class InMemoryAllDocsPostList; 196 227 private: 197 228 map<string, InMemoryTerm> postlists; 198 229 vector<InMemoryDoc> termlists; -
backends/quartz/quartz_alldocspostlist.h
1 /* quartz_alldocspostlist.h: All document postlists in quartz databases 2 * 3 * ----START-LICENCE---- 4 * Copyright 1999,2000,2001 BrightStation PLC 5 * Copyright 2002 Ananova Ltd 6 * Copyright 2002,2003,2004,2005 Olly Betts 7 * Copyright 2006 Richard Boulton 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License as 11 * published by the Free Software Foundation; either version 2 of the 12 * License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 22 * USA 23 * -----END-LICENCE----- 24 */ 25 26 #ifndef OM_HGUARD_QUARTZ_ALLDOCSPOSTLIST_H 27 #define OM_HGUARD_QUARTZ_ALLDOCSPOSTLIST_H 28 29 #include <map> 30 #include <string> 31 32 #include "leafpostlist.h" 33 #include <xapian/database.h> 34 #include <xapian/postingiterator.h> 35 #include "database.h" 36 #include "omassert.h" 37 #include "quartz_types.h" 38 #include "btree.h" 39 40 using namespace std; 41 42 class Bcursor; 43 44 class QuartzDocIdList; 45 46 class QuartzDocIdListIterator { 47 private: 48 const map<Xapian::docid, Xapian::docid> * ranges; 49 map<Xapian::docid, Xapian::docid>::const_iterator currrange; 50 Xapian::docid currdocid; 51 52 friend class QuartzDocIdList; 53 54 QuartzDocIdListIterator(const map<Xapian::docid, Xapian::docid> * ranges_); 55 QuartzDocIdListIterator(const map<Xapian::docid, Xapian::docid> * ranges_, int); 56 57 public: 58 Xapian::docid operator*() { 59 return currdocid; 60 } 61 62 friend bool operator==(const QuartzDocIdListIterator &a, 63 const QuartzDocIdListIterator &b); 64 65 QuartzDocIdListIterator(); 66 ~QuartzDocIdListIterator() {} 67 QuartzDocIdListIterator(const QuartzDocIdListIterator & other); 68 void operator=(const QuartzDocIdListIterator & other); 69 70 QuartzDocIdListIterator & operator++(); 71 72 Xapian::DocIDWrapper operator++(int) { 73 Xapian::docid tmp = **this; 74 operator++(); 75 return Xapian::DocIDWrapper(tmp); 76 } 77 78 Xapian::docid operator *() const { return currdocid; } 79 80 /// Allow use as an STL iterator 81 //@{ 82 typedef std::input_iterator_tag iterator_category; 83 typedef Xapian::docid value_type; 84 typedef Xapian::doccount_diff difference_type; 85 typedef Xapian::docid * pointer; 86 typedef Xapian::docid & reference; 87 //@} 88 }; 89 90 inline bool operator==(const QuartzDocIdListIterator &a, 91 const QuartzDocIdListIterator &b) 92 { 93 if (a.ranges != b.ranges) 94 return false; 95 return a.currdocid == b.currdocid; 96 } 97 98 inline bool operator!=(const QuartzDocIdListIterator &a, 99 const QuartzDocIdListIterator &b) 100 { 101 return !(a==b); 102 } 103 104 class QuartzDocIdList { 105 private: 106 /** Map from start of a range to end of a range. 107 */ 108 map<Xapian::docid, Xapian::docid> ranges; 109 110 public: 111 QuartzDocIdList() {} 112 void addDocId(Xapian::docid did); 113 114 QuartzDocIdListIterator begin() const { 115 return QuartzDocIdListIterator(&ranges); 116 } 117 118 QuartzDocIdListIterator end() const { 119 return QuartzDocIdListIterator(&ranges, 1); 120 } 121 }; 122 123 /** A postlist in a quartz database. 124 */ 125 class QuartzAllDocsPostList : public LeafPostList { 126 private: 127 /// Pointer to database. 128 Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db; 129 130 /// List of docids. 131 QuartzDocIdList docids; 132 133 /// Iterator through docids. 134 QuartzDocIdListIterator dociditer; 135 136 /// Number of documents in the database. 137 Xapian::doccount doccount; 138 139 /// Whether we've started yet. 140 bool have_started; 141 142 /// Copying is not allowed. 143 QuartzAllDocsPostList(const QuartzAllDocsPostList &); 144 145 /// Assignment is not allowed. 146 void operator=(const QuartzAllDocsPostList &); 147 148 public: 149 /// Default constructor. 150 QuartzAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_, 151 const Btree * table, 152 Xapian::doccount doccount_); 153 154 /// Destructor. 155 ~QuartzAllDocsPostList(); 156 157 /** Returns length of the all documents postlist. 158 * 159 * This is also the number of documents in the database. 160 */ 161 Xapian::doccount get_termfreq() const { return doccount; } 162 163 /** Returns the number of occurrences of the term in the database. 164 * 165 * We pretend that each document has one "empty" term, so this is 166 * also the number of documents in the database. 167 */ 168 Xapian::termcount get_collection_freq() const { return doccount; } 169 170 /// Returns the current docid. 171 Xapian::docid get_docid() const { 172 Assert(have_started); 173 return *dociditer; 174 } 175 176 /// Returns the length of current document. 177 Xapian::doclength get_doclength() const { 178 Assert(have_started); 179 return this_db->get_doclength(*dociditer); 180 } 181 182 /** Returns the Within Document Frequency of the term in the current 183 * document. 184 */ 185 Xapian::termcount get_wdf() const { 186 Assert(have_started); 187 return static_cast<Xapian::termcount>(1); 188 } 189 190 /** Get the list of positions of the term in the current document. 191 */ 192 PositionList *read_position_list() { 193 throw Xapian::InvalidOperationError("Can't read position list from all docs postlist."); 194 } 195 196 /** Get the list of positions of the term in the current document. 197 */ 198 PositionList * open_position_list() const { 199 throw Xapian::InvalidOperationError("Can't read position list from all docs postlist."); 200 } 201 202 /// Move to the next document. 203 PostList * next(Xapian::weight w_min); 204 205 /// Skip to next document with docid >= docid. 206 PostList * skip_to(Xapian::docid desired_did, Xapian::weight w_min); 207 208 /// Return true if and only if we're off the end of the list. 209 bool at_end() const { return (have_started && dociditer == docids.end()); } 210 211 /// Get a description of the postlist. 212 std::string get_description() const; 213 }; 214 215 #endif /* OM_HGUARD_QUARTZ_ALLDOCSPOSTLIST_H */ -
backends/quartz/quartz_database.cc
4 4 * Copyright 2001 Hein Ragas 5 5 * Copyright 2002 Ananova Ltd 6 6 * Copyright 2002,2003,2004,2005,2006 Olly Betts 7 * Copyright 2006 Richard Boulton 7 8 * 8 9 * This program is free software; you can redistribute it and/or 9 10 * modify it under the terms of the GNU General Public License as … … 33 34 #include <xapian/valueiterator.h> 34 35 35 36 #include "quartz_postlist.h" 37 #include "quartz_alldocspostlist.h" 36 38 #include "quartz_termlist.h" 37 39 #include "quartz_positionlist.h" 38 40 #include "quartz_utils.h" … … 598 600 QuartzDatabase::do_open_post_list(const string& tname) const 599 601 { 600 602 DEBUGCALL(DB, LeafPostList *, "QuartzDatabase::do_open_post_list", tname); 601 Assert(!tname.empty());603 Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this); 602 604 603 Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this); 604 return(new QuartzPostList(ptrtothis, 605 if (tname.empty()) { 606 RETURN(new QuartzAllDocsPostList(ptrtothis, 607 &termlist_table, 608 get_doccount())); 609 } 610 611 RETURN(new QuartzPostList(ptrtothis, 605 612 &postlist_table, 606 613 &positionlist_table, 607 614 tname)); … … 1111 1118 QuartzWritableDatabase::do_open_post_list(const string& tname) const 1112 1119 { 1113 1120 DEBUGCALL(DB, LeafPostList *, "QuartzWritableDatabase::do_open_post_list", tname); 1114 Assert(!tname.empty());1121 Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this); 1115 1122 1123 if (tname.empty()) { 1124 RETURN(new QuartzAllDocsPostList(ptrtothis, 1125 &database_ro.termlist_table, 1126 get_doccount())); 1127 } 1128 1116 1129 // Need to flush iff we've got buffered changes to this term's postlist. 1117 1130 map<string, map<docid, pair<char, termcount> > >::const_iterator j; 1118 1131 j = mod_plists.find(tname); … … 1122 1135 do_flush_const(); 1123 1136 } 1124 1137 1125 Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this); 1126 return(new QuartzPostList(ptrtothis, 1138 RETURN(new QuartzPostList(ptrtothis, 1127 1139 &database_ro.postlist_table, 1128 1140 &database_ro.positionlist_table, 1129 1141 tname)); -
backends/quartz/Makefile.am
12 12 quartz_utils.h \ 13 13 quartz_log.h \ 14 14 quartz_document.h \ 15 quartz_alldocspostlist.h \ 15 16 quartz_alltermslist.h \ 16 17 quartz_metafile.h \ 17 18 btree.h \ … … 27 28 quartz_values.cc \ 28 29 quartz_log.cc \ 29 30 quartz_document.cc \ 31 quartz_alldocspostlist.cc \ 30 32 quartz_alltermslist.cc \ 31 33 quartz_metafile.cc \ 32 34 btree.cc \ -
backends/quartz/quartz_alldocspostlist.cc
1 /* quartz_alldocspostlist.cc: All-document postlists in quartz databases 2 * 3 * ----START-LICENCE---- 4 * Copyright 1999,2000,2001 BrightStation PLC 5 * Copyright 2002,2003,2004,2005 Olly Betts 6 * Copyright 2006 Richard Boulton 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License as 10 * published by the Free Software Foundation; either version 2 of the 11 * License, or (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 21 * USA 22 * -----END-LICENCE----- 23 */ 24 25 #include <config.h> 26 #include "omdebug.h" 27 #include "quartz_alldocspostlist.h" 28 #include "quartz_utils.h" 29 #include "bcursor.h" 30 #include "database.h" 31 #include <map> 32 33 QuartzDocIdListIterator::QuartzDocIdListIterator() 34 : ranges(NULL), 35 currrange(), 36 currdocid(0) 37 { 38 DEBUGCALL(DB, void, 39 "QuartzDocIdListIterator::QuartzDocIdListIterator", ""); 40 } 41 42 QuartzDocIdListIterator::QuartzDocIdListIterator(const map<Xapian::docid, Xapian::docid> * ranges_, int) 43 : ranges(ranges_), 44 currrange(ranges_->end()), 45 currdocid(0) 46 { 47 } 48 49 QuartzDocIdListIterator::QuartzDocIdListIterator(const map<Xapian::docid, Xapian::docid> * ranges_) 50 : ranges(ranges_), 51 currrange(ranges_->begin()), 52 currdocid(0) 53 { 54 DEBUGCALL(DB, void, 55 "QuartzDocIdListIterator::QuartzDocIdListIterator", "ranges"); 56 if (currrange != ranges_->end()) { 57 currdocid = currrange->first; 58 } 59 60 map<Xapian::docid, Xapian::docid>::const_iterator i; 61 for (i = ranges->begin(); i != ranges->end(); i++) { 62 DEBUGLINE(DB, "Docid range begin=" << i->first << ", end=" << i->second); 63 } 64 } 65 66 QuartzDocIdListIterator::QuartzDocIdListIterator(const QuartzDocIdListIterator & other) 67 : ranges(other.ranges), 68 currrange(other.currrange), 69 currdocid(other.currdocid) 70 { 71 DEBUGCALL(DB, void, 72 "QuartzDocIdListIterator::~QuartzDocIdListIterator", "other"); 73 } 74 75 void 76 QuartzDocIdListIterator::operator=(const QuartzDocIdListIterator & other) 77 { 78 DEBUGCALL(DB, void, 79 "QuartzDocIdListIterator::operator=", "other"); 80 ranges = other.ranges; 81 currrange = other.currrange; 82 currdocid = other.currdocid; 83 } 84 85 QuartzDocIdListIterator & 86 QuartzDocIdListIterator::operator++() 87 { 88 DEBUGCALL(DB, void, 89 "QuartzDocIdListIterator::operator++", ""); 90 DEBUGLINE(DB, string("Moved from ") << 91 (currrange == ranges->end() ? string("end.") : string("docid = ") + 92 om_tostring(currdocid))); 93 94 if (currrange != ranges->end()) { 95 Assert(currrange->first <= currdocid); 96 if (currdocid < currrange->second) { 97 currdocid++; 98 } else { 99 currrange++; 100 if (currrange == ranges->end()) { 101 currdocid = 0; 102 } else { 103 Assert(currrange->first > currdocid); 104 currdocid = currrange->first; 105 } 106 } 107 } 108 109 DEBUGLINE(DB, string("Moved to ") << 110 (currrange == ranges->end() ? string("end.") : string("docid = ") + 111 om_tostring(currdocid))); 112 113 return *this; 114 } 115 116 117 void 118 QuartzDocIdList::addDocId(Xapian::docid did) { 119 DEBUGCALL(DB, void, "QuartzDocIdList::addDocId", did); 120 121 if(ranges.size() == 0) { 122 ranges.insert(pair<Xapian::docid, Xapian::docid>(did, did)); 123 return; 124 } 125 126 if (did < ranges.begin()->first) { 127 Xapian::docid newend; 128 if (did == ranges.begin()->first - 1) { 129 newend = ranges.begin()->second; 130 ranges.erase(ranges.begin()); 131 } else { 132 newend = did; 133 } 134 ranges[did] = newend; 135 return; 136 } 137 138 map<Xapian::docid, Xapian::docid>::iterator i; 139 i = ranges.lower_bound(did); 140 if (i == ranges.end()) { 141 i--; 142 Assert(did > i->first); 143 } else if (did < i->first) { 144 i--; 145 Assert(did > i->first); 146 } 147 Assert(did >= i->first); 148 149 if (did <= i->second) { 150 // Do nothing - already in range 151 return; 152 } 153 154 if (did == i->second + 1) { 155 // Extend range 156 i->second = did; 157 map<Xapian::docid, Xapian::docid>::iterator j; 158 j = i; 159 j++; 160 if (j != ranges.end()) { 161 Assert(j->first > i->second); 162 if (j->first == i->second + 1) { 163 // Merge ranges 164 i->second = j->second; 165 ranges.erase(j); 166 } 167 } 168 } else { 169 ranges[did] = did; 170 } 171 } 172 173 174 QuartzAllDocsPostList::QuartzAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_, 175 const Btree * table, 176 Xapian::doccount doccount_) 177 : this_db(this_db_), 178 docids(), 179 dociditer(), 180 doccount(doccount_), 181 have_started(false) 182 { 183 DEBUGCALL(DB, void, "QuartzAllDocsPostList::QuartzAllDocsPostList", 184 this_db_.get() << ", " << table << ", " << doccount_); 185 186 // Move to initial NULL entry. 187 Bcursor * cursor = table->cursor_get(); 188 cursor->find_entry(""); 189 if (!cursor->after_end()) 190 cursor->next(); 191 while (!cursor->after_end()) { 192 string key = cursor->current_key; 193 const char * keystr = key.c_str(); 194 Xapian::docid did; 195 if (!unpack_uint_last(&keystr, keystr + key.length(), &did)) { 196 throw Xapian::RangeError("Document number in value table is too large"); 197 } 198 docids.addDocId(did); 199 cursor->next(); 200 } 201 } 202 203 QuartzAllDocsPostList::~QuartzAllDocsPostList() 204 { 205 DEBUGCALL(DB, void, "QuartzAllDocsPostList::~QuartzAllDocsPostList", ""); 206 } 207 208 PostList * 209 QuartzAllDocsPostList::next(Xapian::weight w_min) 210 { 211 DEBUGCALL(DB, PostList *, "QuartzAllDocsPostList::next", w_min); 212 (void)w_min; 213 214 if (have_started) { 215 ++dociditer; 216 } else { 217 dociditer = docids.begin(); 218 have_started = true; 219 } 220 221 DEBUGLINE(DB, string("Moved to ") << 222 (dociditer == docids.end() ? string("end.") : string("docid = ") + 223 om_tostring(*dociditer))); 224 225 RETURN(NULL); 226 } 227 228 PostList * 229 QuartzAllDocsPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min) 230 { 231 DEBUGCALL(DB, PostList *, 232 "QuartzAllDocsPostList::skip_to", desired_did << ", " << w_min); 233 (void)w_min; // no warning 234 235 // Don't skip back, and don't need to do anything if already there. 236 if (!have_started) { 237 dociditer = docids.begin(); 238 } 239 if (dociditer == docids.end()) RETURN(NULL); 240 if (desired_did <= *dociditer) RETURN(NULL); 241 242 while (dociditer != docids.end() && *dociditer < desired_did) 243 { 244 ++dociditer; 245 } 246 247 DEBUGLINE(DB, string("Skipped to ") << 248 (dociditer == docids.end() ? string("end.") : string("docid = ") + 249 om_tostring(*dociditer))); 250 251 RETURN(NULL); 252 } 253 254 string 255 QuartzAllDocsPostList::get_description() const 256 { 257 return ":" + om_tostring(doccount); 258 } -
backends/flint/flint_database.cc
4 4 * Copyright 2001 Hein Ragas 5 5 * Copyright 2002 Ananova Ltd 6 6 * Copyright 2002,2003,2004,2005,2006 Olly Betts 7 * Copyright 2006 Richard Boulton 7 8 * 8 9 * This program is free software; you can redistribute it and/or 9 10 * modify it under the terms of the GNU General Public License as … … 34 35 35 36 #include "flint_modifiedpostlist.h" 36 37 #include "flint_postlist.h" 38 #include "flint_alldocspostlist.h" 37 39 #include "flint_termlist.h" 38 40 #include "flint_positionlist.h" 39 41 #include "flint_utils.h" … … 455 457 FlintDatabase::do_open_post_list(const string& tname) const 456 458 { 457 459 DEBUGCALL(DB, LeafPostList *, "FlintDatabase::do_open_post_list", tname); 458 Assert(!tname.empty());460 Xapian::Internal::RefCntPtr<const FlintDatabase> ptrtothis(this); 459 461 460 Xapian::Internal::RefCntPtr<const FlintDatabase> ptrtothis(this); 461 return(new FlintPostList(ptrtothis, 462 if (tname.empty()) { 463 RETURN(new FlintAllDocsPostList(ptrtothis, 464 &termlist_table, 465 get_doccount())); 466 } 467 468 RETURN(new FlintPostList(ptrtothis, 462 469 &postlist_table, 463 470 &positionlist_table, 464 471 tname)); … … 967 974 FlintWritableDatabase::do_open_post_list(const string& tname) const 968 975 { 969 976 DEBUGCALL(DB, LeafPostList *, "FlintWritableDatabase::do_open_post_list", tname); 970 Assert(!tname.empty());971 972 977 Xapian::Internal::RefCntPtr<const FlintWritableDatabase> ptrtothis(this); 973 978 979 if (tname.empty()) { 980 RETURN(new FlintAllDocsPostList(ptrtothis, 981 &database_ro.termlist_table, 982 get_doccount())); 983 } 984 974 985 map<string, map<docid, pair<char, termcount> > >::const_iterator j; 975 986 j = mod_plists.find(tname); 976 987 if (j != mod_plists.end()) { -
backends/flint/flint_alldocspostlist.cc
1 /* flint_alldocspostlist.cc: All-document postlists in flint databases 2 * 3 * ----START-LICENCE---- 4 * Copyright 1999,2000,2001 BrightStation PLC 5 * Copyright 2002,2003,2004,2005 Olly Betts 6 * Copyright 2006 Richard Boulton 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License as 10 * published by the Free Software Foundation; either version 2 of the 11 * License, or (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 21 * USA 22 * -----END-LICENCE----- 23 */ 24 25 #include <config.h> 26 #include "omdebug.h" 27 #include "flint_alldocspostlist.h" 28 #include "flint_utils.h" 29 #include "flint_values.h" 30 #include "flint_cursor.h" 31 #include "database.h" 32 33 /** The format of a postlist is: 34 */ 35 FlintAllDocsPostList::FlintAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_, 36 const FlintTable * table_, 37 Xapian::doccount doccount_) 38 : this_db(this_db_), 39 table(table_), 40 cursor(table->cursor_get()), 41 did(0), 42 is_at_end(false), 43 doccount(doccount_) 44 { 45 DEBUGCALL(DB, void, "FlintAllDocsPostList::FlintAllDocsPostList", 46 this_db_.get() << ", " << table_ << ", " << doccount_); 47 48 // Move to initial NULL entry. 49 cursor->find_entry(""); 50 } 51 52 FlintAllDocsPostList::~FlintAllDocsPostList() 53 { 54 DEBUGCALL(DB, void, "FlintAllDocsPostList::~FlintAllDocsPostList", ""); 55 } 56 57 PostList * 58 FlintAllDocsPostList::next(Xapian::weight w_min) 59 { 60 DEBUGCALL(DB, PostList *, "FlintAllDocsPostList::next", w_min); 61 (void)w_min; // no warning 62 63 cursor->next(); 64 if (cursor->after_end()) { 65 is_at_end = true; 66 } else { 67 string key = cursor->current_key; 68 const char * keystr = key.c_str(); 69 if (!unpack_uint_preserving_sort(&keystr, keystr + key.length(), &did)) { 70 if (*keystr == 0) 71 throw Xapian::DatabaseCorruptError("Unexpected end of data when reading from termlist table"); 72 else 73 throw Xapian::RangeError("Document number in value table is too large"); 74 } 75 } 76 77 DEBUGLINE(DB, string("Moved to ") << 78 (is_at_end ? string("end.") : string("docid = ") + 79 om_tostring(did))); 80 81 RETURN(NULL); 82 } 83 84 PostList * 85 FlintAllDocsPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min) 86 { 87 DEBUGCALL(DB, PostList *, 88 "FlintAllDocsPostList::skip_to", desired_did << ", " << w_min); 89 (void)w_min; // no warning 90 91 // Don't skip back, and don't need to do anything if already there. 92 if (desired_did <= did) RETURN(NULL); 93 if (is_at_end) RETURN(NULL); 94 95 string desired_key = pack_uint_preserving_sort(desired_did); 96 bool exact_match = cursor->find_entry(desired_key); 97 if (!exact_match) 98 cursor->next(); 99 if (cursor->after_end()) { 100 is_at_end = true; 101 } else { 102 string key = cursor->current_key; 103 const char * keystr = key.c_str(); 104 if (!unpack_uint_preserving_sort(&keystr, keystr + key.length(), &did)) { 105 if (*keystr == 0) 106 throw Xapian::DatabaseCorruptError("Unexpected end of data when reading from termlist table"); 107 else 108 throw Xapian::RangeError("Document number in value table is too large"); 109 } 110 } 111 112 DEBUGLINE(DB, string("Skipped to ") << 113 (is_at_end ? string("end.") : string("docid = ") + 114 om_tostring(did))); 115 116 RETURN(NULL); 117 } 118 119 string 120 FlintAllDocsPostList::get_description() const 121 { 122 return ":" + om_tostring(doccount); 123 } -
backends/flint/flint_alldocspostlist.h
1 /* flint_alldocspostlist.h: All document postlists in flint databases 2 * 3 * ----START-LICENCE---- 4 * Copyright 1999,2000,2001 BrightStation PLC 5 * Copyright 2002 Ananova Ltd 6 * Copyright 2002,2003,2004,2005 Olly Betts 7 * Copyright 2006 Richard Boulton 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License as 11 * published by the Free Software Foundation; either version 2 of the 12 * License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 22 * USA 23 * -----END-LICENCE----- 24 */ 25 26 #ifndef OM_HGUARD_FLINT_ALLDOCSPOSTLIST_H 27 #define OM_HGUARD_FLINT_ALLDOCSPOSTLIST_H 28 29 #include <map> 30 #include <string> 31 32 #include "leafpostlist.h" 33 #include "database.h" 34 #include "omassert.h" 35 #include "flint_types.h" 36 37 using namespace std; 38 39 class FlintCursor; 40 class FlintTable; 41 42 /** A postlist in a flint database. 43 */ 44 class FlintAllDocsPostList : public LeafPostList { 45 private: 46 /** The database we are searching. This pointer is held so that the 47 * database doesn't get deleted before us. 48 */ 49 Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db; 50 51 /// The table containing the values. 52 const FlintTable * table; 53 54 /// Cursor pointing to current values. 55 AutoPtr<FlintCursor> cursor; 56 57 /// Document id we're currently at. 58 Xapian::docid did; 59 60 /// Whether we've run off the end of the list yet. 61 bool is_at_end; 62 63 /// Number of documents in the database. 64 Xapian::doccount doccount; 65 66 /// Copying is not allowed. 67 FlintAllDocsPostList(const FlintAllDocsPostList &); 68 69 /// Assignment is not allowed. 70 void operator=(const FlintAllDocsPostList &); 71 72 73 public: 74 /// Default constructor. 75 FlintAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_, 76 const FlintTable * table_, 77 Xapian::doccount doccount_); 78 79 /// Destructor. 80 ~FlintAllDocsPostList(); 81 82 /** Returns length of the all documents postlist. 83 * 84 * This is also the number of documents in the database. 85 */ 86 Xapian::doccount get_termfreq() const { return doccount; } 87 88 /** Returns the number of occurrences of the term in the database. 89 * 90 * We pretend that each document has one "empty" term, so this is 91 * also the number of documents in the database. 92 */ 93 Xapian::termcount get_collection_freq() const { return doccount; } 94 95 /// Returns the current docid. 96 Xapian::docid get_docid() const { Assert(did != 0); return did; } 97 98 /// Returns the length of current document. 99 Xapian::doclength get_doclength() const { 100 Assert(did != 0); 101 return this_db->get_doclength(did); 102 } 103 104 /** Returns the Within Document Frequency of the term in the current 105 * document. 106 */ 107 Xapian::termcount get_wdf() const { Assert(did != 0); return static_cast<Xapian::termcount>(1); } 108 109 /** Get the list of positions of the term in the current document. 110 */ 111 PositionList *read_position_list() { 112 throw Xapian::InvalidOperationError("Can't read position list from all docs postlist."); 113 } 114 115 /** Get the list of positions of the term in the current document. 116 */ 117 PositionList * open_position_list() const { 118 throw Xapian::InvalidOperationError("Can't read position list from all docs postlist."); 119 } 120 121 /// Move to the next document. 122 PostList * next(Xapian::weight w_min); 123 124 /// Skip to next document with docid >= docid. 125 PostList * skip_to(Xapian::docid desired_did, Xapian::weight w_min); 126 127 /// Return true if and only if we're off the end of the list. 128 bool at_end() const { return is_at_end; } 129 130 /// Get a description of the postlist. 131 std::string get_description() const; 132 }; 133 134 #endif /* OM_HGUARD_FLINT_ALLDOCSPOSTLIST_H */ -
backends/flint/Makefile.am
12 12 flint_values.h \ 13 13 flint_utils.h \ 14 14 flint_document.h \ 15 flint_alldocspostlist.h \ 15 16 flint_alltermslist.h \ 16 17 flint_table.h \ 17 18 flint_cursor.h \ … … 29 30 flint_record.cc \ 30 31 flint_values.cc \ 31 32 flint_document.cc \ 33 flint_alldocspostlist.cc \ 32 34 flint_alltermslist.cc \ 33 35 flint_table.cc \ 34 36 flint_cursor.cc \