Ticket #394: phrase-settling-pond-update-20120911.patch
File phrase-settling-pond-update-20120911.patch, 24.6 KB (added by , 12 years ago) |
---|
-
xapian-core/api/leafpostlist.cc
diff --git a/xapian-core/api/leafpostlist.cc b/xapian-core/api/leafpostlist.cc index 0bbeedb..c5e969b 100644
a b LeafPostList::count_matching_subqs() const 102 102 { 103 103 return 1; 104 104 } 105 106 std::string 107 LeafPostList::get_termname() const 108 { 109 return term; 110 } -
xapian-core/api/leafpostlist.h
diff --git a/xapian-core/api/leafpostlist.h b/xapian-core/api/leafpostlist.h index bf107ca..34c05fa 100644
a b class LeafPostList : public PostList { 86 86 TermFreqs get_termfreq_est_using_stats( 87 87 const Xapian::Weight::Internal & stats) const; 88 88 89 virtual std::string get_termname() const; 90 89 91 Xapian::termcount count_matching_subqs() const; 90 92 }; 91 93 -
xapian-core/api/postlist.cc
diff --git a/xapian-core/api/postlist.cc b/xapian-core/api/postlist.cc index 2684913..886c29f 100644
a b PostList::count_matching_subqs() const 78 78 return 0; 79 79 } 80 80 81 std::string 82 PostList::get_termname() const 83 { 84 return std::string(); 85 } 86 81 87 } -
xapian-core/api/postlist.h
diff --git a/xapian-core/api/postlist.h b/xapian-core/api/postlist.h index 0c7ca1f..8fd67e6 100644
a b class Xapian::PostingIterator::Internal : public Xapian::Internal::intrusive_bas 194 194 /// Count the number of leaf subqueries which match at the current position. 195 195 virtual Xapian::termcount count_matching_subqs() const; 196 196 197 /// If this is a term, return the name, otherwise return empty string. 198 virtual std::string get_termname() const; 199 197 200 /// Return a string description of this object. 198 201 virtual std::string get_description() const = 0; 199 202 }; -
xapian-core/api/queryinternal.cc
diff --git a/xapian-core/api/queryinternal.cc b/xapian-core/api/queryinternal.cc index 9359e48..0bc5daa 100644
a b class AndContext : public Context { 248 248 Xapian::termcount window_) 249 249 : op_(op__), begin(begin_), end(end_), window(window_) { } 250 250 251 PostList * postlist(PostList * pl, const vector<PostList*>& pls) const; 251 PostList * postlist(PostList * pl, const vector<PostList*>& pls, 252 QueryOptimiser * qopt) const; 252 253 }; 253 254 254 255 list<PosFilter> pos_filters; … … class AndContext : public Context { 264 265 }; 265 266 266 267 PostList * 267 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const 268 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls, 269 QueryOptimiser * qopt) const 268 270 try { 269 271 vector<PostList *>::const_iterator terms_begin = pls.begin() + begin; 270 272 vector<PostList *>::const_iterator terms_end = pls.begin() + end; … … try { 273 275 pl = new NearPostList(pl, window, terms_begin, terms_end); 274 276 } else if (window == end - begin) { 275 277 AssertEq(op_, Xapian::Query::OP_PHRASE); 276 pl = new ExactPhrasePostList(pl, terms_begin, terms_end); 278 if (qopt->top_and) { 279 vector<PostList *>::const_iterator j; 280 for (j = terms_begin; j != terms_end; ++j) { 281 const string & term = (*j)->get_termname(); 282 if (term.empty()) { 283 // FIXME: Currently all the subqueries must be terms. 284 qopt->pool_terms.clear(); 285 goto cannot_pool; 286 } 287 qopt->pool_terms.push_back(term); 288 } 289 // We can currently only handle hoisting out one phrase check. 290 // FIXME: Gather a list of checks, not a list of the terms in one 291 // check. 292 qopt->top_and = false; 293 } else { 294 cannot_pool: 295 pl = new ExactPhrasePostList(pl, terms_begin, terms_end); 296 } 277 297 } else { 278 298 AssertEq(op_, Xapian::Query::OP_PHRASE); 279 299 pl = new PhrasePostList(pl, window, terms_begin, terms_end); … … AndContext::postlist(QueryOptimiser* qopt) 308 328 list<PosFilter>::const_iterator i; 309 329 for (i = pos_filters.begin(); i != pos_filters.end(); ++i) { 310 330 const PosFilter & filter = *i; 311 pl.reset(filter.postlist(pl.release(), pls ));331 pl.reset(filter.postlist(pl.release(), pls, qopt)); 312 332 } 313 333 314 334 // Empty pls so our destructor doesn't delete them all! … … Query::Internal::postlist_sub_or_like(OrContext& ctx, 491 511 QueryOptimiser * qopt, 492 512 double factor) const 493 513 { 514 bool top_and = qopt->top_and; 515 qopt->top_and = false; 494 516 ctx.add_postlist(postlist(qopt, factor)); 517 qopt->top_and = top_and; 495 518 } 496 519 497 520 void … … Query::Internal::postlist_sub_xor(XorContext& ctx, 499 522 QueryOptimiser * qopt, 500 523 double factor) const 501 524 { 525 bool top_and = qopt->top_and; 526 qopt->top_and = false; 502 527 ctx.add_postlist(postlist(qopt, factor)); 528 qopt->top_and = top_and; 503 529 } 504 530 505 531 namespace Internal { … … QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const 1147 1173 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor); 1148 1174 // FIXME: Combine and-like side with and-like stuff above. 1149 1175 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor)); 1176 bool top_and = qopt->top_and; 1177 qopt->top_and = false; 1150 1178 OrContext ctx(subqueries.size() - 1); 1151 1179 do_or_like(ctx, qopt, 0.0, 0, 1); 1152 1180 AutoPtr<PostList> r(ctx.postlist(qopt)); 1181 qopt->top_and = top_and; 1153 1182 RETURN(new AndNotPostList(l.release(), r.release(), 1154 1183 qopt->matcher, qopt->db_size)); 1155 1184 } … … QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const 1180 1209 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor); 1181 1210 // FIXME: Combine and-like side with and-like stuff above. 1182 1211 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor)); 1212 bool top_and = qopt->top_and; 1213 qopt->top_and = false; 1183 1214 OrContext ctx(subqueries.size() - 1); 1184 1215 do_or_like(ctx, qopt, factor, 0, 1); 1185 1216 AutoPtr<PostList> r(ctx.postlist(qopt)); 1217 qopt->top_and = top_and; 1186 1218 RETURN(new AndMaybePostList(l.release(), r.release(), 1187 1219 qopt->matcher, qopt->db_size)); 1188 1220 } -
xapian-core/common/submatch.h
diff --git a/xapian-core/common/submatch.h b/xapian-core/common/submatch.h index c90eee0..bdd16f0 100644
a b class SubMatch : public Xapian::Internal::intrusive_base { 76 76 virtual PostList * get_postlist_and_term_info(MultiMatch *matcher, 77 77 std::map<std::string, 78 78 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts, 79 Xapian::termcount * total_subqs_ptr) 79 Xapian::termcount * total_subqs_ptr, 80 std::vector<std::string> & pool_terms) 80 81 = 0; 81 82 }; 82 83 -
xapian-core/matcher/Makefile.mk
diff --git a/xapian-core/matcher/Makefile.mk b/xapian-core/matcher/Makefile.mk index 0bae22c..85dc522 100644
a b noinst_HEADERS +=\ 4 4 matcher/branchpostlist.h\ 5 5 matcher/collapser.h\ 6 6 matcher/const_database_wrapper.h\ 7 matcher/exactphrasecheck.h\ 7 8 matcher/exactphrasepostlist.h\ 8 9 matcher/externalpostlist.h\ 9 10 matcher/extraweightpostlist.h\ … … lib_src +=\ 42 43 matcher/branchpostlist.cc\ 43 44 matcher/collapser.cc\ 44 45 matcher/const_database_wrapper.cc\ 46 matcher/exactphrasecheck.cc\ 45 47 matcher/exactphrasepostlist.cc\ 46 48 matcher/externalpostlist.cc\ 47 49 matcher/localsubmatch.cc\ -
new file xapian-core/matcher/exactphrasecheck.cc
diff --git a/xapian-core/matcher/exactphrasecheck.cc b/xapian-core/matcher/exactphrasecheck.cc new file mode 100644 index 0000000..c6eade9
- + 1 /** @file exactphrasecheck.cc 2 * @brief Check if terms form a particular exact phrase. 3 */ 4 /* Copyright (C) 2006,2007,2009,2012 Olly Betts 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 // FIXME: this could probably share code with ExactPhrasePostList. 22 23 #include <config.h> 24 25 #include "exactphrasecheck.h" 26 27 #include "debuglog.h" 28 #include "omassert.h" 29 #include "backends/positionlist.h" 30 31 #include <algorithm> 32 #include <vector> 33 34 using namespace std; 35 36 class TermCompare { 37 const Xapian::Database & db; 38 vector<string> & terms; 39 40 public: 41 TermCompare(const Xapian::Database & db_, 42 vector<string> & terms_) 43 : db(db_), terms(terms_) { } 44 45 bool operator()(unsigned a, unsigned b) const { 46 return db.get_collection_freq(terms[a]) < db.get_collection_freq(terms[b]); 47 } 48 }; 49 50 ExactPhraseCheck::ExactPhraseCheck(const Xapian::Database & db_, 51 const vector<string> &terms_) 52 : db(db_), terms(terms_) 53 { 54 if (terms.empty()) { 55 poslists = NULL; 56 order = NULL; 57 return; 58 } 59 60 AssertRel(terms.size(),>,1); 61 size_t n = terms_.size(); 62 poslists = new PositionList*[n]; 63 try { 64 order = new unsigned[n]; 65 } catch (...) { 66 delete [] poslists; 67 throw; 68 } 69 for (size_t i = 0; i < n; ++i) { 70 poslists[i] = NULL; 71 order[i] = unsigned(i); 72 } 73 74 // We often don't need to read all the position lists, so rather than using 75 // the shortest position lists first, we approximate by using the terms 76 // with the lowest collection freq first. Overall this should give a 77 // similar order. 78 sort(order, order + terms.size(), TermCompare(db, terms)); 79 } 80 81 ExactPhraseCheck::~ExactPhraseCheck() 82 { 83 delete [] poslists; 84 delete [] order; 85 } 86 87 bool 88 ExactPhraseCheck::start_position_list(unsigned i, Xapian::docid did) 89 { 90 AssertRel(i,<,terms.size()); 91 unsigned index = order[i]; 92 // FIXME: nasty hacking around with internals and ref counts - we should 93 // just add a new Database::Internal method to do what we want. 94 Xapian::PositionIterator p = db.positionlist_begin(did, terms[index]); 95 PositionList * tmp = p.internal; 96 if (!tmp) 97 return false; 98 ++tmp->_refs; 99 p.internal = poslists[i]; 100 poslists[i] = tmp; 101 poslists[i]->index = index; 102 return true; 103 } 104 105 bool 106 ExactPhraseCheck::operator()(Xapian::docid did) 107 { 108 LOGCALL(MATCH, bool, "ExactPhraseCheck::operator()", did); 109 110 if (terms.size() <= 1) RETURN(true); 111 112 // We often don't need to read all the position lists, so rather than using 113 114 AssertRel(terms.size(),>,1); 115 116 bool result = false; 117 // If the first term we check only occurs too close to the start of the 118 // document, we only need to read one term's positions. E.g. search for 119 // "ripe mango" when the only occurrence of 'mango' in the current document 120 // is at position 0. 121 if (!start_position_list(0, did)) 122 goto done; 123 poslists[0]->skip_to(poslists[0]->index); 124 if (poslists[0]->at_end()) goto done; 125 126 // If we get here, we'll need to read the positionlists for at least two 127 // terms, so check the true positionlist length for the two terms with the 128 // lowest wdf and if necessary swap them so the true shorter one is first. 129 if (!start_position_list(1, did)) 130 goto done; 131 if (poslists[0]->get_size() < poslists[1]->get_size()) { 132 poslists[1]->skip_to(poslists[1]->index); 133 if (poslists[1]->at_end()) goto done; 134 swap(poslists[0], poslists[1]); 135 } 136 137 { 138 unsigned read_hwm = 1; 139 Xapian::termpos idx0 = poslists[0]->index; 140 do { 141 Xapian::termpos base = poslists[0]->get_position() - idx0; 142 unsigned i = 1; 143 while (true) { 144 if (i > read_hwm) { 145 read_hwm = i; 146 if (!start_position_list(i, did)) 147 goto done; 148 // FIXME: consider comparing with poslist[0] and swapping 149 // if less common. Should we allow for the number of positions 150 // we've read from poslist[0] already? 151 } 152 Xapian::termpos required = base + poslists[i]->index; 153 poslists[i]->skip_to(required); 154 if (poslists[i]->at_end()) goto done; 155 if (poslists[i]->get_position() != required) break; 156 if (++i == terms.size()) { 157 result = true; 158 goto done; 159 } 160 } 161 poslists[0]->next(); 162 } while (!poslists[0]->at_end()); 163 } 164 done: 165 for (size_t i = 0; i < terms.size(); ++i) { 166 delete poslists[i]; 167 poslists[i] = NULL; 168 } 169 RETURN(result); 170 } -
new file xapian-core/matcher/exactphrasecheck.h
diff --git a/xapian-core/matcher/exactphrasecheck.h b/xapian-core/matcher/exactphrasecheck.h new file mode 100644 index 0000000..52b9e9e
- + 1 /** @file exactphrasecheck.cc 2 * @brief Check if terms form a particular exact phrase. 3 */ 4 /* Copyright (C) 2006,2012 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22 #ifndef XAPIAN_INCLUDED_EXACTPHRASEPOSTLIST_H 23 #define XAPIAN_INCLUDED_EXACTPHRASEPOSTLIST_H 24 25 #include "xapian/database.h" 26 27 #include <string> 28 #include <vector> 29 30 typedef Xapian::PositionIterator::Internal PositionList; 31 32 /** Check for an exact phrase using positional information. 33 * 34 * Tests if the terms occur somewhere in the document in the order given 35 * and at adjacent term positions. 36 */ 37 class ExactPhraseCheck { 38 Xapian::Database db; 39 40 std::vector<std::string> terms; 41 42 PositionList ** poslists; 43 44 unsigned * order; 45 46 /// Start reading from the i-th position list. 47 bool start_position_list(unsigned i, Xapian::docid did); 48 49 public: 50 ExactPhraseCheck(const Xapian::Database & db_, 51 const std::vector<std::string> &terms_); 52 53 ~ExactPhraseCheck(); 54 55 /// Test if the specified document contains the terms as an exact phrase. 56 bool operator()(Xapian::docid did); 57 }; 58 59 #endif -
xapian-core/matcher/localsubmatch.cc
diff --git a/xapian-core/matcher/localsubmatch.cc b/xapian-core/matcher/localsubmatch.cc index 10c648f..763fdcc 100644
a b LocalSubMatch::start_match(Xapian::doccount first, 68 68 PostList * 69 69 LocalSubMatch::get_postlist_and_term_info(MultiMatch * matcher, 70 70 map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts, 71 Xapian::termcount * total_subqs_ptr) 71 Xapian::termcount * total_subqs_ptr, 72 std::vector<std::string> & pool_terms) 72 73 { 73 74 LOGCALL(MATCH, PostList *, "LocalSubMatch::get_postlist_and_term_info", matcher | termfreqandwts | total_subqs_ptr); 74 75 (void)matcher; … … LocalSubMatch::get_postlist_and_term_info(MultiMatch * matcher, 82 83 83 84 PostList * pl; 84 85 { 85 QueryOptimiser opt(*db, *this, matcher );86 QueryOptimiser opt(*db, *this, matcher, pool_terms); 86 87 pl = query.internal->postlist(&opt, 1.0); 87 88 *total_subqs_ptr = opt.get_total_subqs(); 88 89 } -
xapian-core/matcher/localsubmatch.h
diff --git a/xapian-core/matcher/localsubmatch.h b/xapian-core/matcher/localsubmatch.h index 8e92416..ea50a2c 100644
a b class LocalSubMatch : public SubMatch { 89 89 PostList * get_postlist_and_term_info(MultiMatch *matcher, 90 90 std::map<std::string, 91 91 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts, 92 Xapian::termcount * total_subqs_ptr); 92 Xapian::termcount * total_subqs_ptr, 93 std::vector<std::string> & pool_terms); 93 94 94 95 /** Convert a postlist into a synonym postlist. 95 96 */ -
xapian-core/matcher/multimatch.cc
diff --git a/xapian-core/matcher/multimatch.cc b/xapian-core/matcher/multimatch.cc index 654908a..a221ce8 100644
a b 46 46 #include "valuestreamdocument.h" 47 47 #include "weight/weightinternal.h" 48 48 49 #include "exactphrasecheck.h" 50 49 51 #include <xapian/errorhandler.h> 50 52 #include <xapian/matchspy.h> 51 53 #include <xapian/version.h> // For XAPIAN_HAS_REMOTE_BACKEND … … MultiMatch::get_mset(Xapian::doccount first, Xapian::doccount maxitems, 355 357 map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts_ptr; 356 358 termfreqandwts_ptr = &termfreqandwts; 357 359 360 vector<string> pool_terms; 358 361 Xapian::termcount total_subqs = 0; 359 362 // Keep a count of matches which we know exist, but we won't see. This 360 363 // occurs when a submatch is remote, and returns a lower bound on the … … MultiMatch::get_mset(Xapian::doccount first, Xapian::doccount maxitems, 364 367 for (size_t i = 0; i != leaves.size(); ++i) { 365 368 PostList *pl; 366 369 try { 370 if (!is_remote[i]) pool_terms.clear(); 367 371 pl = leaves[i]->get_postlist_and_term_info(this, 368 372 termfreqandwts_ptr, 369 &total_subqs); 373 &total_subqs, 374 pool_terms); 370 375 if (termfreqandwts_ptr && !termfreqandwts.empty()) 371 376 termfreqandwts_ptr = NULL; 372 377 if (is_remote[i]) { … … MultiMatch::get_mset(Xapian::doccount first, Xapian::doccount maxitems, 525 530 // Is the mset a valid heap? 526 531 bool is_heap = false; 527 532 533 size_t SETTLING_POND_SIZE = 0; 534 if (!pool_terms.empty()) { 535 const char * sps = getenv("POND_SIZE"); 536 SETTLING_POND_SIZE = sps ? atoi(sps) : 100000; 537 } 538 ExactPhraseCheck phrase_check(db, pool_terms); 539 // FIXME: a min/max heap is probably a better choice here (notably more 540 // compact) but the STL doesn't provide one so we'd have to find an 541 // implementation or write one. 542 multimap<double, Xapian::Internal::MSetItem> settling_pond; 528 543 while (true) { 529 544 bool pushback; 530 545 … … MultiMatch::get_mset(Xapian::doccount first, Xapian::doccount maxitems, 646 661 new_item.wt = wt; 647 662 } 648 663 664 if (SETTLING_POND_SIZE) { 665 if (items.size() >= max_msize) { 666 // Settling pond handling... 667 multimap<double, Xapian::Internal::MSetItem>::iterator it; 668 it = settling_pond.upper_bound(-min_weight); 669 settling_pond.erase(it, settling_pond.end()); 670 671 settling_pond.insert(make_pair(-new_item.wt, new_item)); 672 if (settling_pond.size() < SETTLING_POND_SIZE) { 673 continue; 674 } 675 676 // Take the last item off the heap, which will have a reasonably 677 // high weight in general. 678 it = settling_pond.begin(); 679 swap(new_item, it->second); 680 settling_pond.erase(it); 681 } 682 if (!phrase_check(new_item.did)) continue; 683 } 684 649 685 pushback = true; 650 686 651 687 // Perform collapsing on key if requested. … … new_greatest_weight: 808 844 } 809 845 } 810 846 847 multimap<double, Xapian::Internal::MSetItem>::iterator it; 848 for (it = settling_pond.begin(); it != settling_pond.end(); ++it) { 849 const Xapian::Internal::MSetItem & new_item = it->second; 850 if (new_item.wt < min_weight) break; 851 if (!phrase_check(new_item.did)) continue; 852 853 { 854 ++docs_matched; 855 if (items.size() >= max_msize) { 856 items.push_back(new_item); 857 if (!is_heap) { 858 is_heap = true; 859 make_heap(items.begin(), items.end(), mcmp); 860 } else { 861 push_heap<vector<Xapian::Internal::MSetItem>::iterator, 862 MSetCmp>(items.begin(), items.end(), mcmp); 863 } 864 pop_heap<vector<Xapian::Internal::MSetItem>::iterator, 865 MSetCmp>(items.begin(), items.end(), mcmp); 866 items.pop_back(); 867 868 min_item = items.front(); 869 if (sort_by == REL || sort_by == REL_VAL) { 870 if (docs_matched >= check_at_least) { 871 if (sort_by == REL) { 872 // We're done if this is a forward boolean match 873 // with only one database (bodgetastic, FIXME 874 // better if we can!) 875 if (rare(max_possible == 0 && sort_forward)) { 876 // In the multi database case, MergePostList 877 // currently processes each database 878 // sequentially (which actually may well be 879 // more efficient) so the docids in general 880 // won't arrive in order. 881 // FIXME: is this still good here: 882 // if (leaves.size() == 1) break; 883 } 884 } 885 if (min_item.wt > min_weight) { 886 LOGLINE(MATCH, "Setting min_weight to " << 887 min_item.wt << " from " << min_weight); 888 min_weight = min_item.wt; 889 } 890 } 891 } 892 } else { 893 items.push_back(new_item); 894 is_heap = false; 895 if (sort_by == REL && items.size() == max_msize) { 896 if (docs_matched >= check_at_least) { 897 // We're done if this is a forward boolean match 898 // with only one database (bodgetastic, FIXME 899 // better if we can!) 900 if (rare(max_possible == 0 && sort_forward)) { 901 // In the multi database case, MergePostList 902 // currently processes each database 903 // sequentially (which actually may well be 904 // more efficient) so the docids in general 905 // won't arrive in order. 906 // FIXME: if (leaves.size() == 1) break; 907 } 908 } 909 } 910 } 911 } 912 913 // Keep a track of the greatest weight we've seen. 914 if (new_item.wt > greatest_wt) { 915 greatest_wt = new_item.wt; 916 #ifdef XAPIAN_HAS_REMOTE_BACKEND 917 const unsigned int multiplier = db.internal.size(); 918 unsigned int db_num = (new_item.did - 1) % multiplier; 919 if (is_remote[db_num]) { 920 // Note that the greatest weighted document came from a remote 921 // database, and which one. 922 greatest_wt_subqs_db_num = db_num; 923 } else 924 #endif 925 { 926 greatest_wt_subqs_matched = pl->count_matching_subqs(); 927 #ifdef XAPIAN_HAS_REMOTE_BACKEND 928 greatest_wt_subqs_db_num = UINT_MAX; 929 #endif 930 } 931 if (percent_cutoff) { 932 double w = new_item.wt * percent_cutoff_factor; 933 if (w > min_weight) { 934 min_weight = w; 935 if (!is_heap) { 936 is_heap = true; 937 make_heap<vector<Xapian::Internal::MSetItem>::iterator, 938 MSetCmp>(items.begin(), items.end(), mcmp); 939 } 940 while (!items.empty() && items.front().wt < min_weight) { 941 pop_heap<vector<Xapian::Internal::MSetItem>::iterator, 942 MSetCmp>(items.begin(), items.end(), mcmp); 943 Assert(items.back().wt < min_weight); 944 items.pop_back(); 945 } 946 #ifdef XAPIAN_ASSERTIONS_PARANOID 947 vector<Xapian::Internal::MSetItem>::const_iterator i; 948 for (i = items.begin(); i != items.end(); ++i) { 949 Assert(i->wt >= min_weight); 950 } 951 #endif 952 } 953 } 954 } 955 } 956 957 811 958 // done with posting list tree 812 959 pl.reset(NULL); 813 960 -
xapian-core/matcher/queryoptimiser.h
diff --git a/xapian-core/matcher/queryoptimiser.h b/xapian-core/matcher/queryoptimiser.h index 7147b76..cfa6409 100644
a b class QueryOptimiser { 49 49 Xapian::termcount total_subqs; 50 50 51 51 public: 52 std::vector<std::string> & pool_terms; 53 54 bool top_and; 55 52 56 const Xapian::Database::Internal & db; 53 57 54 58 Xapian::doccount db_size; … … class QueryOptimiser { 57 61 58 62 QueryOptimiser(const Xapian::Database::Internal & db_, 59 63 LocalSubMatch & localsubmatch_, 60 MultiMatch * matcher_) 64 MultiMatch * matcher_, 65 std::vector<std::string> & pool_terms_) 61 66 : localsubmatch(localsubmatch_), total_subqs(0), 62 db(db_), db_size(db.get_doccount()), matcher(matcher_) { } 67 pool_terms(pool_terms_), top_and(true), db(db_), 68 db_size(db.get_doccount()), matcher(matcher_) { } 63 69 64 70 void inc_total_subqs() { ++total_subqs; } 65 71 -
xapian-core/matcher/remotesubmatch.cc
diff --git a/xapian-core/matcher/remotesubmatch.cc b/xapian-core/matcher/remotesubmatch.cc index ff5184e..4e6efa4 100644
a b RemoteSubMatch::start_match(Xapian::doccount first, 62 62 PostList * 63 63 RemoteSubMatch::get_postlist_and_term_info(MultiMatch *, 64 64 map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts, 65 Xapian::termcount * total_subqs_ptr) 65 Xapian::termcount * total_subqs_ptr, 66 std::vector<std::string> &) 66 67 { 67 68 LOGCALL(MATCH, PostList *, "RemoteSubMatch::get_postlist_and_term_info", Literal("[matcher]") | termfreqandwts | total_subqs_ptr); 68 69 Xapian::MSet mset; -
xapian-core/matcher/remotesubmatch.h
diff --git a/xapian-core/matcher/remotesubmatch.h b/xapian-core/matcher/remotesubmatch.h index 1198d8a..7d29e16 100644
a b class RemoteSubMatch : public SubMatch { 72 72 PostList * get_postlist_and_term_info(MultiMatch *matcher, 73 73 std::map<std::string, 74 74 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts, 75 Xapian::termcount * total_subqs_ptr); 75 Xapian::termcount * total_subqs_ptr, 76 std::vector<std::string> & pool_terms); 76 77 77 78 /// Get percentage factor - only valid after get_postlist_and_term_info(). 78 79 double get_percent_factor() const { return percent_factor; }