Ticket #153: xapian-allprefixedterms-wildcard.patch
File xapian-allprefixedterms-wildcard.patch, 24.3 KB (added by , 17 years ago) |
---|
-
xapian-core/queryparser/queryparser.lemony
295 295 Term::as_wildcarded_query(State * state_) const 296 296 { 297 297 Database db = state_->get_database(); 298 TermIterator t = db.allterms_begin();299 298 string root = prefix; 300 299 root += name; 301 t.skip_to(root);300 TermIterator t = db.allterms_begin(root); 302 301 QpQuery q; 303 while (t != db.allterms_end( ) && (*t).substr(0, root.size()) == root) {302 while (t != db.allterms_end(root)) { 304 303 add_to_query(q, Query::OP_OR, QpQuery(*t, 1, pos)); 305 304 ++t; 306 305 } … … 316 315 Term::as_partial_query(State * state_) const 317 316 { 318 317 Database db = state_->get_database(); 319 TermIterator t = db.allterms_begin();320 318 string root = prefix; 321 319 root += name; 322 t.skip_to(root);320 TermIterator t = db.allterms_begin(root); 323 321 QpQuery q; 324 while (t != db.allterms_end( ) && (*t).substr(0, root.size()) == root) {322 while (t != db.allterms_end(root)) { 325 323 add_to_query(q, Query::OP_OR, QpQuery(*t, 1, pos)); 326 324 ++t; 327 325 } -
xapian-core/tests/api_db.cc
707 707 return true; 708 708 } 709 709 710 // test allterms iterators with prefixes 711 static bool test_allterms6() 712 { 713 Xapian::Database db; 714 db.add_database(get_database("apitest_allterms")); 715 db.add_database(get_database("apitest_allterms2")); 716 717 Xapian::TermIterator ati = db.allterms_begin("three"); 718 TEST(ati != db.allterms_end("three")); 719 TEST_EQUAL(*ati, "three"); 720 ati.skip_to("three"); 721 TEST(ati != db.allterms_end("three")); 722 TEST_EQUAL(*ati, "three"); 723 ati++; 724 TEST(ati == db.allterms_end("three")); 725 726 ati = db.allterms_begin("thre"); 727 TEST(ati != db.allterms_end("thre")); 728 TEST_EQUAL(*ati, "three"); 729 ati.skip_to("three"); 730 TEST(ati != db.allterms_end("thre")); 731 TEST_EQUAL(*ati, "three"); 732 ati++; 733 TEST(ati == db.allterms_end("thre")); 734 735 ati = db.allterms_begin("f"); 736 TEST(ati != db.allterms_end("f")); 737 TEST_EQUAL(*ati, "five"); 738 TEST(ati != db.allterms_end("f")); 739 ati.skip_to("three"); 740 TEST(ati == db.allterms_end("f")); 741 742 ati = db.allterms_begin("f"); 743 TEST(ati != db.allterms_end("f")); 744 TEST_EQUAL(*ati, "five"); 745 ati++; 746 TEST(ati != db.allterms_end("f")); 747 TEST_EQUAL(*ati, "four"); 748 ati++; 749 TEST(ati == db.allterms_end("f")); 750 751 return true; 752 } 753 710 754 // test that searching for a term with a special characters in it works 711 755 static bool test_specialterms1() 712 756 { … … 1632 1676 {"allterms3", test_allterms3}, 1633 1677 {"allterms4", test_allterms4}, 1634 1678 {"allterms5", test_allterms5}, 1679 {"allterms6", test_allterms6}, 1635 1680 {"specialterms2", test_specialterms2}, 1636 1681 {0, 0} 1637 1682 }; -
xapian-core/include/xapian/database.h
163 163 return TermIterator(NULL); 164 164 } 165 165 166 /** An iterator which runs across all terms with a given prefix. 167 * 168 * This is functionally similar to getting an iterator with 169 * allterms_begin() and then calling skip_to(prefix) on that iterator 170 * to move to the start of the prefix, but is more convenient (because 171 * it detects the end of the prefixed terms), and may be more 172 * efficient than simply calling skip_to() after opening the iterator, 173 * particularly for network databases. 174 * 175 * @param prefix The prefix to restrict the returned terms to. 176 */ 177 TermIterator allterms_begin(const std::string & prefix) const; 178 179 /** Corresponding end iterator to allterms_begin(prefix). 180 */ 181 TermIterator allterms_end(const std::string &) const { 182 return TermIterator(NULL); 183 } 184 166 185 /// Get the number of documents in the database. 167 186 Xapian::doccount get_doccount() const; 168 187 -
xapian-core/net/remoteserver.cc
237 237 } 238 238 239 239 void 240 RemoteServer::msg_allterms(const string & )240 RemoteServer::msg_allterms(const string &message) 241 241 { 242 const Xapian::TermIterator end = db->allterms_end(); 243 for (Xapian::TermIterator t = db->allterms_begin(); t != end; ++t) { 242 const char *p = message.data(); 243 const char *p_end = p + message.size(); 244 string prefix(p, p_end - p); 245 246 const Xapian::TermIterator end = db->allterms_end(prefix); 247 for (Xapian::TermIterator t = db->allterms_begin(prefix); t != end; ++t) { 244 248 string item = encode_length(t.get_termfreq()); 245 249 item += *t; 246 250 send_message(REPLY_ALLTERMS, item); -
xapian-core/common/remote-database.h
150 150 TermList * open_term_list(Xapian::docid did) const; 151 151 152 152 /// Iterate all terms. 153 TermList * open_allterms( ) const;153 TermList * open_allterms(const string & prefix) const; 154 154 155 155 bool has_positions() const; 156 156 -
xapian-core/common/remoteprotocol.h
32 32 // 27: Support for postlists (always passes the whole list across) 33 33 // 28: Pass document length in reply to MSG_TERMLIST 34 34 // 29: Serialisation of Xapian::Error includes error_string. 35 #define XAPIAN_REMOTE_PROTOCOL_VERSION 29 35 // 30: Pass the prefix parameter for MSG_ALLTERMS, and use it. 36 #define XAPIAN_REMOTE_PROTOCOL_VERSION 30 36 37 37 38 /// Message types (client -> server). 38 39 enum message_type { -
xapian-core/common/database.h
197 197 * 198 198 * This is a list of all the terms in the database 199 199 * 200 * @param prefix The prefix to restrict the terms to. 200 201 * @return A pointer to the newly created allterms list. 201 202 * This object must be deleted by the caller after 202 203 * use. 203 204 */ 204 virtual TermList * open_allterms( ) const = 0;205 virtual TermList * open_allterms(const string & prefix) const = 0; 205 206 206 207 /** Open a position list for the given term in the given document. 207 208 * -
xapian-core/api/omdatabase.cc
151 151 TermIterator 152 152 Database::allterms_begin() const 153 153 { 154 return allterms_begin(""); 155 } 156 157 TermIterator 158 Database::allterms_begin(const std::string & prefix) const 159 { 154 160 DEBUGAPICALL(TermIterator, "Database::allterms_begin", ""); 155 161 if (internal.empty()) RETURN(TermIterator(NULL)); 156 162 157 163 if (internal.size() == 1) 158 RETURN(TermIterator(internal[0]->open_allterms( )));164 RETURN(TermIterator(internal[0]->open_allterms(prefix))); 159 165 160 166 vector<TermList *> lists; 161 167 162 168 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i; 163 169 for (i = internal.begin(); i != internal.end(); ++i) { 164 lists.push_back((*i)->open_allterms( ));170 lists.push_back((*i)->open_allterms(prefix)); 165 171 } 166 172 167 173 RETURN(TermIterator(new MultiAllTermsList(lists))); -
xapian-core/backends/inmemory/inmemory_alltermslist.cc
25 25 #include "inmemory_alltermslist.h" 26 26 27 27 InMemoryAllTermsList::InMemoryAllTermsList(const std::map<string, InMemoryTerm> *tmap_, 28 Xapian::Internal::RefCntPtr<const InMemoryDatabase> database_) 29 : tmap(tmap_), it(tmap->begin()), database(database_), started(false) 28 Xapian::Internal::RefCntPtr<const InMemoryDatabase> database_, 29 const string & prefix_) 30 : tmap(tmap_), it(tmap->begin()), database(database_), started(false), prefix(prefix_) 30 31 { 31 while (it != tmap->end() && it->second.term_freq == 0) ++it; 32 while (it != tmap->end() && 33 (it->second.term_freq == 0 || 34 it->first.substr(0, prefix.size()) != prefix)) 35 ++it; 32 36 } 33 37 34 38 InMemoryAllTermsList::~InMemoryAllTermsList() … … 73 77 // FIXME: might skip backwards - is this a problem? 74 78 it = tmap->lower_bound(tname); 75 79 while (it != tmap->end() && it->second.term_freq == 0) ++it; 80 if (it != tmap->end() && it->first.substr(0, prefix.size()) != prefix) 81 it = tmap->end(); 76 82 return NULL; 77 83 } 78 84 … … 86 92 ++it; 87 93 while (it != tmap->end() && it->second.term_freq == 0) ++it; 88 94 } 95 if (it != tmap->end() && it->first.substr(0, prefix.size()) != prefix) 96 it = tmap->end(); 89 97 return NULL; 90 98 } 91 99 -
xapian-core/backends/inmemory/inmemory_alltermslist.h
44 44 Xapian::Internal::RefCntPtr<const InMemoryDatabase> database; 45 45 46 46 bool started; 47 48 string prefix; 47 49 public: 48 50 /// Standard constructor for base class. 49 51 InMemoryAllTermsList(const std::map<string, InMemoryTerm> *tmap_, 50 Xapian::Internal::RefCntPtr<const InMemoryDatabase> database_); 52 Xapian::Internal::RefCntPtr<const InMemoryDatabase> database_, 53 const string & prefix); 51 54 52 55 /// Standard destructor for base class. 53 56 ~InMemoryAllTermsList(); -
xapian-core/backends/inmemory/inmemory_database.cc
707 707 } 708 708 709 709 TermList * 710 InMemoryDatabase::open_allterms( ) const710 InMemoryDatabase::open_allterms(const string & prefix) const 711 711 { 712 712 return new InMemoryAllTermsList(&postlists, 713 Xapian::Internal::RefCntPtr<const InMemoryDatabase>(this)); 713 Xapian::Internal::RefCntPtr<const InMemoryDatabase>(this), 714 prefix); 714 715 } -
xapian-core/backends/inmemory/inmemory_database.h
306 306 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy = false) const; 307 307 PositionList * open_position_list(Xapian::docid did, 308 308 const string & tname) const; 309 TermList * open_allterms( ) const;309 TermList * open_allterms(const string & prefix) const; 310 310 }; 311 311 312 312 #endif /* OM_HGUARD_INMEMORY_DATABASE_H */ -
xapian-core/backends/quartz/quartz_database.h
220 220 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy = false) const; 221 221 PositionList * open_position_list(Xapian::docid did, 222 222 const string & tname) const; 223 TermList * open_allterms( ) const;223 TermList * open_allterms(const string & prefix) const; 224 224 //@} 225 225 }; 226 226 … … 317 317 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy = false) const; 318 318 PositionList * open_position_list(Xapian::docid did, 319 319 const string & tname) const; 320 TermList * open_allterms( ) const;320 TermList * open_allterms(const string & prefix) const; 321 321 //@} 322 322 }; 323 323 -
xapian-core/backends/quartz/quartz_alltermslist.cc
29 29 30 30 QuartzAllTermsList::QuartzAllTermsList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> database_, 31 31 AutoPtr<Bcursor> pl_cursor_, 32 quartz_tablesize_t size_) 32 quartz_tablesize_t size_, 33 const string & prefix_) 33 34 : database(database_), pl_cursor(pl_cursor_), size(size_), 34 started(false) 35 started(false), prefix(prefix_) 35 36 { 36 37 DEBUGCALL(DB, void, "QuartzAllTermsList", "[database_], [pl_cursor_]"); 37 /* Seek to the first term */ 38 pl_cursor->find_entry(string()); 38 if (prefix.empty()) { 39 /* Seek to the first term */ 40 pl_cursor->find_entry(string()); 39 41 40 if (pl_cursor->current_key.empty()) { 41 pl_cursor->next(); 42 if (pl_cursor->current_key.empty()) { 43 pl_cursor->next(); 44 } 45 } else { 46 // Seek to the first key before one with the desired prefix. 47 if (!pl_cursor->find_entry(pack_string_preserving_sort(prefix))) { 48 // Found a a key which is before the prefix - advance to the 49 // first one following the prefix. 50 pl_cursor->next(); 51 } 42 52 } 43 53 44 54 is_at_end = pl_cursor->after_end(); … … 50 60 } 51 61 } 52 62 63 if (current_term.substr(0, prefix.size()) != prefix) 64 is_at_end = true; 65 53 66 have_stats = false; 54 67 } 55 68 … … 128 141 is_at_end = true; 129 142 } else { 130 143 next(); 144 // next() checks the prefix, so we don't need to do that here. 131 145 } 132 146 } else { 133 147 // This assertion isn't true if key contains zero bytes. 134 148 // Assert(key == pl_cursor->current_key); 135 149 current_term = tname; 150 151 // Check that we haven't gone past the prefix. 152 if (current_term.substr(0, prefix.size()) != prefix) 153 is_at_end = true; 136 154 } 137 155 RETURN(NULL); 138 156 } … … 156 174 if (!unpack_string_preserving_sort(&start, end, current_term)) { 157 175 throw Xapian::DatabaseCorruptError("Failed to read the key field from a Bcursor's key"); 158 176 } 177 if (current_term.substr(0, prefix.size()) != prefix) { 178 is_at_end = true; 179 break; 180 } 159 181 // Check if this is the first chunk of a postlist, skip otherwise 160 182 if (start == end) break; 161 183 } -
xapian-core/backends/quartz/quartz_alltermslist.h
55 55 /// Cached termname 56 56 string current_term; 57 57 58 /// The prefix to restrict the terms to. 59 string prefix; 60 58 61 /// Cached statistics 59 62 mutable bool have_stats; 60 63 mutable Xapian::termcount termfreq; … … 65 68 /// Standard constructor for base class. 66 69 QuartzAllTermsList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> database_, 67 70 AutoPtr<Bcursor> pl_cursor_, 68 quartz_tablesize_t size_); 71 quartz_tablesize_t size_, 72 const string & prefix_); 69 73 70 74 /// Standard destructor for base class. 71 75 ~QuartzAllTermsList(); -
xapian-core/backends/quartz/quartz_database.cc
663 663 } 664 664 665 665 TermList * 666 QuartzDatabase::open_allterms( ) const666 QuartzDatabase::open_allterms(const string & prefix) const 667 667 { 668 668 DEBUGCALL(DB, TermList *, "QuartzDatabase::open_allterms", ""); 669 669 AutoPtr<Bcursor> pl_cursor(postlist_table.cursor_get()); 670 670 RETURN(new QuartzAllTermsList(Xapian::Internal::RefCntPtr<const QuartzDatabase>(this), 671 pl_cursor, postlist_table.get_entry_count() ));671 pl_cursor, postlist_table.get_entry_count(), prefix)); 672 672 } 673 673 674 674 size_t QuartzWritableDatabase::flush_threshold = 0; … … 1195 1195 } 1196 1196 1197 1197 TermList * 1198 QuartzWritableDatabase::open_allterms( ) const1198 QuartzWritableDatabase::open_allterms(const string & prefix) const 1199 1199 { 1200 1200 DEBUGCALL(DB, TermList *, "QuartzWritableDatabase::open_allterms", ""); 1201 1201 if (transaction_active()) … … 1205 1205 QuartzPostListTable *t = &database_ro.postlist_table; 1206 1206 AutoPtr<Bcursor> pl_cursor(t->cursor_get()); 1207 1207 RETURN(new QuartzAllTermsList(Xapian::Internal::RefCntPtr<const QuartzWritableDatabase>(this), 1208 pl_cursor, t->get_entry_count() ));1208 pl_cursor, t->get_entry_count(), prefix)); 1209 1209 } 1210 1210 1211 1211 void -
xapian-core/backends/remote/remote-database.cc
152 152 } 153 153 154 154 TermList * 155 RemoteDatabase::open_allterms( ) const {155 RemoteDatabase::open_allterms(const string & prefix) const { 156 156 // Ensure that avlength and doccount are up-to-date. 157 157 if (!cached_stats_valid) update_stats(); 158 158 159 send_message(MSG_ALLTERMS, "");159 send_message(MSG_ALLTERMS, prefix); 160 160 161 161 AutoPtr<NetworkTermList> tlist; 162 162 tlist = new NetworkTermList(0.0, doccount, -
xapian-core/backends/flint/flint_database.h
213 213 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy = false) const; 214 214 PositionList * open_position_list(Xapian::docid did, 215 215 const string & tname) const; 216 TermList * open_allterms( ) const;216 TermList * open_allterms(const string & prefix) const; 217 217 //@} 218 218 }; 219 219 … … 310 310 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy = false) const; 311 311 PositionList * open_position_list(Xapian::docid did, 312 312 const string & tname) const; 313 TermList * open_allterms( ) const;313 TermList * open_allterms(const string & prefix) const; 314 314 //@} 315 315 }; 316 316 -
xapian-core/backends/flint/flint_alltermslist.cc
103 103 throw Xapian::DatabaseCorruptError("PostList table key has unexpected format"); 104 104 } 105 105 106 if (current_term.substr(0, prefix.size()) != prefix) { 107 // We've reached the end of the end of the prefixed terms. 108 cursor->to_end(); 109 current_term = ""; 110 break; 111 } 112 106 113 // If this key is for the first chunk of a postlist, we're done. Otherwise we need 107 114 // to skip past continuation chunks until we find the first chunk of the next postlist. 108 115 if (p == pend) break; … … 114 121 FlintAllTermsList::skip_to(const string &tname) 115 122 { 116 123 DEBUGCALL(DB, TermList *, "FlintAllTermsList::skip_to", tname); 124 if (at_end()) abort(); 117 125 Assert(!at_end()); 118 126 // Set termfreq to 0 to indicate no value has been read for the current term. 119 127 termfreq = 0; -
xapian-core/backends/flint/flint_cursor.h
199 199 */ 200 200 bool find_entry(const string &key); 201 201 202 /** Set the cursor to be off the end of the table. 203 */ 204 void to_end() { is_after_end = true; } 205 202 206 /** Determine whether cursor is off the end of table. 203 207 * 204 208 * @return true if the cursor has been moved off the end of the -
xapian-core/backends/flint/flint_alltermslist.h
48 48 /// The termname at the current position. 49 49 string current_term; 50 50 51 /// The prefix to restrict the terms to. 52 string prefix; 53 51 54 /** The term frequency of the term at the current position. 52 55 * 53 56 * If this value is zero, then we haven't read the term frequency or … … 64 67 65 68 public: 66 69 FlintAllTermsList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> database_, 67 const FlintPostListTable * pltab) : database(database_), termfreq(0) { 70 const FlintPostListTable * pltab, 71 const string & prefix_) 72 : database(database_), prefix(prefix_), termfreq(0) { 68 73 // The number of entries in the postlist table will be the number of 69 74 // terms, probably plus some extra entries for chunked posting lists, 70 75 // plus 1 for the metainfo key (unless the table is completely empty). … … 77 82 approx_size = pltab->get_entry_count(); 78 83 if (approx_size) --approx_size; 79 84 80 // Seek to the metainfo key, so the first next will advance us to the81 // first real key.82 85 cursor = pltab->cursor_get(); 83 cursor->find_entry(string("", 1)); 86 if (prefix.empty()) { 87 // Seek to the metainfo key, so the first next() will advance us to the 88 // first real key. 89 cursor->find_entry(string("", 1)); 90 } else { 91 // Seek to the first key before one with the desired prefix. 92 if (cursor->find_entry(pack_string_preserving_sort(prefix))) { 93 // Found a key which is exactly the prefix - move back, so that 94 // next() moves to it. 95 cursor->prev(); 96 } 97 } 84 98 } 85 99 86 100 /// Destructor. -
xapian-core/backends/flint/flint_database.cc
516 516 } 517 517 518 518 TermList * 519 FlintDatabase::open_allterms( ) const519 FlintDatabase::open_allterms(const string & prefix) const 520 520 { 521 521 DEBUGCALL(DB, TermList *, "FlintDatabase::open_allterms", ""); 522 522 RETURN(new FlintAllTermsList(Xapian::Internal::RefCntPtr<const FlintDatabase>(this), 523 &postlist_table ));523 &postlist_table, prefix)); 524 524 } 525 525 526 526 size_t FlintWritableDatabase::flush_threshold = 0; … … 1053 1053 } 1054 1054 1055 1055 TermList * 1056 FlintWritableDatabase::open_allterms( ) const1056 FlintWritableDatabase::open_allterms(const string & prefix) const 1057 1057 { 1058 1058 DEBUGCALL(DB, TermList *, "FlintWritableDatabase::open_allterms", ""); 1059 1059 if (transaction_active()) … … 1062 1062 // need to flush. 1063 1063 if (changes_made) do_flush_const(); 1064 1064 RETURN(new FlintAllTermsList(Xapian::Internal::RefCntPtr<const FlintWritableDatabase>(this), 1065 &database_ro.postlist_table ));1065 &database_ro.postlist_table, prefix)); 1066 1066 } 1067 1067 1068 1068 void -
xapian-bindings/python/extra.i
536 536 Query.__iter__ = _query_gen_iter 537 537 538 538 # Modify Database to add an "__iter__()" method and an "allterms()" method. 539 def _database_gen_allterms_iter(self ):539 def _database_gen_allterms_iter(self, prefix=None): 540 540 """Get an iterator over all the terms in the database. 541 541 542 542 The iterator will return TermListItem objects, but these will not support … … 545 545 Access to term frequency information is only available until the iterator 546 546 has moved on. 547 547 548 If prefix is supplied, only terms which start with that prefix will be 549 returned. 550 548 551 """ 549 return TermIter(self.allterms_begin(), self.allterms_end(), 550 has_termfreq=TermIter.LAZY) 552 if prefix is None: 553 return TermIter(self.allterms_begin(), self.allterms_end(), 554 has_termfreq=TermIter.LAZY) 555 else: 556 return TermIter(self.allterms_begin(prefix), self.allterms_end(prefix), 557 has_termfreq=TermIter.LAZY) 551 558 Database.__iter__ = _database_gen_allterms_iter 552 559 Database.allterms = _database_gen_allterms_iter 553 560 -
xapian-bindings/xapian.i
657 657 PositionIterator positionlist_end(docid did, const std::string& tname) const; 658 658 TermIterator allterms_begin() const; 659 659 TermIterator allterms_end() const; 660 TermIterator allterms_begin(const std::string &prefix) const; 661 TermIterator allterms_end(const std::string &prefix) const; 660 662 661 663 doccount get_doccount() const; 662 664 docid get_lastdocid() const;