Ticket #47: alldocpostlists.patch
File alldocpostlists.patch, 13.3 KB (added by , 18 years ago) |
---|
-
tests/apitest.cc
45 45 BackendManager backendmanager; 46 46 47 47 Xapian::Database 48 get_database() 49 { 50 vector<string> dbnames; 51 return backendmanager.get_database(dbnames); 52 } 53 54 Xapian::Database 48 55 get_database(const string &dbname) 49 56 { 50 57 return backendmanager.get_database(dbname); … … 101 108 RUNTESTS("inmemory", anydb); 102 109 RUNTESTS("inmemory", specchar); 103 110 RUNTESTS("inmemory", writabledb); 111 RUNTESTS("inmemory", writablelocaldb); 104 112 RUNTESTS("inmemory", localdb); 105 113 RUNTESTS("inmemory", positionaldb); 106 114 RUNTESTS("inmemory", localpositionaldb); … … 114 122 RUNTESTS("flint", anydb); 115 123 RUNTESTS("flint", specchar); 116 124 RUNTESTS("flint", writabledb); 125 RUNTESTS("flint", writablelocaldb); 117 126 RUNTESTS("flint", localdb); 118 127 RUNTESTS("flint", positionaldb); 119 128 RUNTESTS("flint", localpositionaldb); … … 129 138 RUNTESTS("quartz", anydb); 130 139 RUNTESTS("quartz", specchar); 131 140 RUNTESTS("quartz", writabledb); 141 RUNTESTS("quartz", writablelocaldb); 132 142 RUNTESTS("quartz", localdb); 133 143 RUNTESTS("quartz", positionaldb); 134 144 RUNTESTS("quartz", localpositionaldb); -
tests/apitest.h
26 26 27 27 #include <xapian.h> 28 28 29 Xapian::Database get_database(); 29 30 Xapian::Database get_database(const std::string &dbname); 30 31 Xapian::Database get_database(const std::string &dbname, 31 32 const std::string &dbname2); -
tests/api_db.cc
1011 1011 return true; 1012 1012 } 1013 1013 1014 // tests all document postlists 1015 static bool test_allpostlist1() 1016 { 1017 Xapian::Database db(get_database("apitest_manydocs")); 1018 Xapian::PostingIterator i = db.postlist_begin(""); 1019 unsigned int j = 1; 1020 while (i != db.postlist_end("")) { 1021 TEST_EQUAL(*i, j); 1022 i++; 1023 j++; 1024 } 1025 TEST_EQUAL(j, 513); 1026 return true; 1027 } 1028 1029 static void test_emptyterm1_helper(Xapian::Database & db) 1030 { 1031 // Don't bother with postlist_begin() because allpostlist tests cover that. 1032 TEST_EXCEPTION(Xapian::InvalidArgumentError, db.positionlist_begin(1, "")); 1033 TEST_EQUAL(db.get_doccount(), db.get_termfreq("")); 1034 TEST_EQUAL(db.get_doccount() != 0, db.term_exists("")); 1035 TEST_EQUAL(db.get_doccount(), db.get_collection_freq("")); 1036 } 1037 1038 // tests results of passing an empty term to various methods 1039 static bool test_emptyterm1() 1040 { 1041 Xapian::Database db(get_database("apitest_manydocs")); 1042 TEST_EQUAL(db.get_doccount(), 512); 1043 test_emptyterm1_helper(db); 1044 1045 db = get_database("apitest_onedoc"); 1046 TEST_EQUAL(db.get_doccount(), 1); 1047 test_emptyterm1_helper(db); 1048 1049 db = get_database(); 1050 TEST_EQUAL(db.get_doccount(), 0); 1051 test_emptyterm1_helper(db); 1052 1053 return true; 1054 } 1055 1014 1056 // tests collection frequency 1015 1057 static bool test_collfreq1() 1016 1058 { … … 1379 1421 {"postlist4", test_postlist4}, 1380 1422 {"postlist5", test_postlist5}, 1381 1423 {"postlist6", test_postlist6}, 1424 {"allpostlist1", test_allpostlist1}, 1425 {"emptyterm1", test_emptyterm1}, 1382 1426 {"termstats", test_termstats}, 1383 1427 {"sortvalue1", test_sortvalue1}, 1384 1428 // consistency1 will run on the remote backend, but it's particularly slow -
tests/api_wrdb.cc
909 909 return true; 910 910 } 911 911 912 // tests all document postlists 913 static bool test_allpostlist2() 914 { 915 Xapian::WritableDatabase db(get_writable_database("apitest_manydocs")); 916 Xapian::PostingIterator i = db.postlist_begin(""); 917 unsigned int j = 1; 918 while (i != db.postlist_end("")) { 919 TEST_EQUAL(*i, j); 920 i++; 921 j++; 922 } 923 TEST_EQUAL(j, 513); 924 925 db.delete_document(1); 926 db.delete_document(50); 927 db.delete_document(512); 928 929 i = db.postlist_begin(""); 930 j = 2; 931 while (i != db.postlist_end("")) { 932 TEST_EQUAL(*i, j); 933 i++; 934 j++; 935 if (j == 50) j++; 936 } 937 TEST_EQUAL(j, 512); 938 939 return true; 940 } 941 942 static void test_emptyterm2_helper(Xapian::WritableDatabase & db) 943 { 944 // Don't bother with postlist_begin() because allpostlist tests cover that. 945 TEST_EXCEPTION(Xapian::InvalidArgumentError, db.positionlist_begin(1, "")); 946 TEST_EQUAL(db.get_doccount(), db.get_termfreq("")); 947 TEST_EQUAL(db.get_doccount() != 0, db.term_exists("")); 948 TEST_EQUAL(db.get_doccount(), db.get_collection_freq("")); 949 } 950 951 // tests results of passing an empty term to various methods 952 // equivalent of emptyterm1 for a writable database 953 static bool test_emptyterm2() 954 { 955 Xapian::WritableDatabase db(get_writable_database("apitest_manydocs")); 956 TEST_EQUAL(db.get_doccount(), 512); 957 test_emptyterm2_helper(db); 958 db.delete_document(1); 959 TEST_EQUAL(db.get_doccount(), 511); 960 test_emptyterm2_helper(db); 961 db.delete_document(50); 962 TEST_EQUAL(db.get_doccount(), 510); 963 test_emptyterm2_helper(db); 964 db.delete_document(512); 965 TEST_EQUAL(db.get_doccount(), 509); 966 test_emptyterm2_helper(db); 967 968 db = get_writable_database("apitest_onedoc"); 969 TEST_EQUAL(db.get_doccount(), 1); 970 test_emptyterm2_helper(db); 971 db.delete_document(1); 972 TEST_EQUAL(db.get_doccount(), 0); 973 test_emptyterm2_helper(db); 974 975 db = get_writable_database(""); 976 TEST_EQUAL(db.get_doccount(), 0); 977 test_emptyterm2_helper(db); 978 979 return true; 980 } 981 912 982 // Check that PHRASE/NEAR becomes AND if there's no positional info in the 913 983 // database. 914 984 static bool test_phraseorneartoand1() … … 1035 1105 {"replacedoc3", test_replacedoc3}, 1036 1106 {"replacedoc4", test_replacedoc4}, 1037 1107 {"uniqueterm1", test_uniqueterm1}, 1108 {"emptyterm2", test_emptyterm2}, 1038 1109 {"phraseorneartoand1", test_phraseorneartoand1}, 1039 1110 {"longpositionlist1", test_longpositionlist1}, 1040 1111 {0, 0} 1041 1112 }; 1113 1114 /// The tests which use a writable, but local, backend 1115 test_desc writablelocaldb_tests[] = { 1116 {"allpostlist2", test_allpostlist2}, 1117 {0, 0} 1118 }; -
tests/api_wrdb.h
27 27 #include "testsuite.h" 28 28 29 29 extern test_desc writabledb_tests[]; 30 extern test_desc writablelocaldb_tests[]; 30 31 31 32 #endif /* XAPIAN_HGUARD_API_WRDB_H */ -
common/database.h
188 188 * use. 189 189 */ 190 190 LeafPostList * open_post_list(const string & tname) const { 191 if (!t erm_exists(tname)) {191 if (!tname.empty() && !term_exists(tname)) { 192 192 DEBUGLINE(MATCH, tname + " is not in database."); 193 193 // Term doesn't exist in this database. However, we create 194 194 // a (empty) postlist for it to help make distributed searching -
api/omdatabase.cc
101 101 Database::postlist_begin(const string &tname) const 102 102 { 103 103 DEBUGAPICALL(PostingIterator, "Database::postlist_begin", tname); 104 if (tname.empty())105 throw InvalidArgumentError("Zero length terms are invalid");106 104 107 105 // Don't bother checking that the term exists first. If it does, we 108 106 // just end up doing more work, and if it doesn't, we save very little … … 248 246 Database::get_termfreq(const string & tname) const 249 247 { 250 248 DEBUGAPICALL(Xapian::doccount, "Database::get_termfreq", tname); 251 if (tname.empty()) 252 throw InvalidArgumentError("Zero length terms are invalid"); 249 if (tname.empty()) { 250 return get_doccount(); 251 } 253 252 Xapian::doccount tf = 0; 254 253 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i; 255 254 for (i = internal.begin(); i != internal.end(); i++) { … … 262 261 Database::get_collection_freq(const string & tname) const 263 262 { 264 263 DEBUGAPICALL(Xapian::termcount, "Database::get_collection_freq", tname); 265 if (tname.empty()) 266 throw InvalidArgumentError("Zero length terms are invalid"); 264 if (tname.empty()) { 265 return get_doccount(); 266 } 267 267 268 268 Xapian::termcount cf = 0; 269 269 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i; … … 303 303 bool 304 304 Database::term_exists(const string & tname) const 305 305 { 306 if (tname.empty()) 307 throw InvalidArgumentError("Zero length terms are invalid"); 306 if (tname.empty()) { 307 return get_doccount() != 0; 308 } 308 309 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i; 309 310 for (i = internal.begin(); i != internal.end(); ++i) { 310 311 if ((*i)->term_exists(tname)) return true; -
backends/inmemory/inmemory_database.cc
260 260 return Xapian::PositionIterator(db->open_position_list(did, (*pos).tname)); 261 261 } 262 262 263 ///////////////////////////// 264 // InMemoryAllDocsPostList // 265 ///////////////////////////// 266 267 InMemoryAllDocsPostList::InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_) 268 : did(0), db(db_) 269 { 270 } 271 272 Xapian::doccount 273 InMemoryAllDocsPostList::get_termfreq() const 274 { 275 return db->totdocs; 276 } 277 278 Xapian::docid 279 InMemoryAllDocsPostList::get_docid() const 280 { 281 Assert(did > 0); 282 Assert(did <= db->termlists.size()); 283 Assert(db->termlists[did - 1].is_valid); 284 return did; 285 } 286 287 Xapian::doclength 288 InMemoryAllDocsPostList::get_doclength() const 289 { 290 return db->get_doclength(did); 291 } 292 293 Xapian::termcount 294 InMemoryAllDocsPostList::get_wdf() const 295 { 296 return 1; 297 } 298 299 PositionList * 300 InMemoryAllDocsPostList::read_position_list() 301 { 302 throw Xapian::UnimplementedError("Can't open position list for all docs iterator"); 303 } 304 305 PositionList * 306 InMemoryAllDocsPostList::open_position_list() const 307 { 308 throw Xapian::UnimplementedError("Can't open position list for all docs iterator"); 309 } 310 311 PostList * 312 InMemoryAllDocsPostList::next(Xapian::weight /*w_min*/) 313 { 314 Assert(!at_end()); 315 do { 316 ++did; 317 } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid); 318 return NULL; 319 } 320 321 PostList * 322 InMemoryAllDocsPostList::skip_to(Xapian::docid did_, Xapian::weight /*w_min*/) 323 { 324 Assert(!at_end()); 325 if (did <= did_) { 326 did = did_; 327 while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) { 328 ++did; 329 } 330 } 331 return NULL; 332 } 333 334 bool 335 InMemoryAllDocsPostList::at_end() const 336 { 337 return (did > db->termlists.size()); 338 } 339 340 string 341 InMemoryAllDocsPostList::get_description() const 342 { 343 return "InMemoryAllDocsPostList" + om_tostring(did); 344 } 345 263 346 /////////////////////////// 264 347 // Actual database class // 265 348 /////////////////////////// … … 279 362 LeafPostList * 280 363 InMemoryDatabase::do_open_post_list(const string & tname) const 281 364 { 282 Assert(tname.size() != 0); 365 if (tname.empty()) { 366 if (termlists.empty()) 367 return new EmptyPostList(); 368 return new InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase>(this)); 369 } 283 370 map<string, InMemoryTerm>::const_iterator i = postlists.find(tname); 284 371 if (i == postlists.end() || i->second.term_freq == 0) 285 372 return new EmptyPostList(); -
backends/inmemory/inmemory_database.h
159 159 string get_description() const; 160 160 }; 161 161 162 /** A PostList over all docs in an inmemory database. 163 */ 164 class InMemoryAllDocsPostList : public LeafPostList { 165 friend class InMemoryDatabase; 166 private: 167 Xapian::docid did; 168 169 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db; 170 171 InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db); 172 public: 173 Xapian::doccount get_termfreq() const; 174 175 Xapian::docid get_docid() const; // Gets current docid 176 Xapian::doclength get_doclength() const; // Length of current document 177 Xapian::termcount get_wdf() const; // Within Document Frequency 178 PositionList * read_position_list(); 179 PositionList * open_position_list() const; 180 181 PostList *next(Xapian::weight w_min); // Moves to next docid 182 183 PostList *skip_to(Xapian::docid did, Xapian::weight w_min); // Moves to next docid >= specified docid 184 185 // True if we're off the end of the list 186 bool at_end() const; 187 188 string get_description() const; 189 }; 190 162 191 // Term List 163 192 class InMemoryTermList : public LeafTermList { 164 193 friend class InMemoryDatabase; … … 193 222 * This is a prototype database, mainly used for debugging and testing. 194 223 */ 195 224 class InMemoryDatabase : public Xapian::Database::Internal { 225 friend class InMemoryAllDocsPostList; 196 226 private: 197 227 map<string, InMemoryTerm> postlists; 198 228 vector<InMemoryDoc> termlists;