Ticket #50: opsynonym_changes_12590_12591.patch
File opsynonym_changes_12590_12591.patch, 121.8 KB (added by , 16 years ago) |
---|
-
xapian-maintainer-tools/win32msvc/win32_matcher.mak
34 34 $(INTDIR)\queryoptimiser.obj\ 35 35 $(INTDIR)\rset.obj\ 36 36 $(INTDIR)\selectpostlist.obj\ 37 $(INTDIR)\synonympostlist.obj\ 37 38 $(INTDIR)\valuerangepostlist.obj\ 38 39 $(INTDIR)\valuegepostlist.obj\ 39 40 $(INTDIR)\xorpostlist.obj\ … … 60 61 $(INTDIR)\queryoptimiser.cc\ 61 62 $(INTDIR)\rset.cc\ 62 63 $(INTDIR)\selectpostlist.cc\ 64 $(INTDIR)\synonympostlist.cc\ 63 65 $(INTDIR)\valuerangepostlist.cc\ 64 66 $(INTDIR)\valuegepostlist.cc\ 65 67 $(INTDIR)\xorpostlist.cc\ -
xapian-core/queryparser/queryparser.lemony
2 2 /* queryparser.lemony: build a Xapian::Query object from a user query string. 3 3 * 4 4 * Copyright (C) 2004,2005,2006,2007,2008 Olly Betts 5 * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 287 288 end = db.synonyms_end(term); 288 289 } 289 290 while (syn != end) { 290 q = Query(Query::OP_ OR, q, Query(*syn, 1, pos));291 q = Query(Query::OP_SYNONYM, q, Query(*syn, 1, pos)); 291 292 ++syn; 292 293 } 293 294 } … … 353 354 } 354 355 } 355 356 delete this; 356 return new Query(Query::OP_ OR, subqs.begin(), subqs.end());357 return new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end()); 357 358 } 358 359 359 360 Query * 360 361 Term::as_partial_query(State * state_) const 361 362 { 362 363 Database db = state_->get_database(); 363 vector<Query> subqs; 364 vector<Query> subqs_partial; // A synonym of all the partial terms. 365 vector<Query> subqs_full; // A synonym of all the full terms. 364 366 list<string>::const_iterator piter; 365 367 for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) { 366 368 string root = *piter; 367 369 root += name; 368 370 TermIterator t = db.allterms_begin(root); 369 371 while (t != db.allterms_end(root)) { 370 subqs .push_back(Query(*t, 1, pos));372 subqs_partial.push_back(Query(*t, 1, pos)); 371 373 ++t; 372 374 } 373 375 // Add the term, as it would normally be handled, as an alternative. 374 subqs .push_back(Query(make_term(*piter), 1, pos));376 subqs_full.push_back(Query(make_term(*piter), 1, pos)); 375 377 } 376 378 delete this; 377 return new Query(Query::OP_OR, subqs.begin(), subqs.end()); 379 return new Query(Query::OP_OR, 380 Query(Query::OP_SYNONYM, 381 subqs_partial.begin(), subqs_partial.end()), 382 Query(Query::OP_SYNONYM, 383 subqs_full.begin(), subqs_full.end())); 378 384 } 379 385 380 386 inline bool … … 1176 1182 subqs2.push_back(Query(*syn, 1, pos)); 1177 1183 ++syn; 1178 1184 } 1179 Query q_synonym_terms(Query::OP_ OR, subqs2.begin(), subqs2.end());1185 Query q_synonym_terms(Query::OP_SYNONYM, subqs2.begin(), subqs2.end()); 1180 1186 subqs2.clear(); 1181 subqs.push_back(Query(Query::OP_ OR,1187 subqs.push_back(Query(Query::OP_SYNONYM, 1182 1188 q_original_terms, q_synonym_terms)); 1183 1189 } 1184 1190 } else { -
xapian-core/matcher/extraweightpostlist.cc
1 /* extraweightpostlist.cc: Return only items which are in both sublists 2 * 3 * Copyright 2009 Lemur Consulting Ltd 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the 8 * License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 18 * USA 19 */ 20 21 #include <config.h> 22 23 #include "extraweightpostlist.h" 24 #include "omassert.h" 25 26 TermFreqs 27 ExtraWeightPostList::get_termfreq_est_using_stats( 28 const Xapian::Weight::Internal &) const 29 { 30 // Should never get called. 31 Assert(false); 32 return TermFreqs(); 33 } -
xapian-core/matcher/extraweightpostlist.h
Property changes on: xapian-core/matcher/extraweightpostlist.cc ___________________________________________________________________ Added: svn:eol-style + native
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2001 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 48 49 return pl->get_termfreq_est(); 49 50 } 50 51 52 TermFreqs get_termfreq_est_using_stats( 53 const Xapian::Weight::Internal & stats) const; 54 51 55 Xapian::docid get_docid() const { return pl->get_docid(); } 52 56 53 57 Xapian::weight get_weight() const { -
xapian-core/matcher/Makefile.mk
18 18 matcher/queryoptimiser.h\ 19 19 matcher/remotesubmatch.h\ 20 20 matcher/selectpostlist.h\ 21 matcher/synonympostlist.h\ 21 22 matcher/valuegepostlist.h\ 22 23 matcher/valuerangepostlist.h\ 23 24 matcher/xorpostlist.h … … 43 44 matcher/emptysubmatch.cc\ 44 45 matcher/exactphrasepostlist.cc\ 45 46 matcher/externalpostlist.cc\ 47 matcher/extraweightpostlist.cc\ 46 48 matcher/localmatch.cc\ 47 49 matcher/mergepostlist.cc\ 48 50 matcher/msetcmp.cc\ … … 54 56 matcher/queryoptimiser.cc\ 55 57 matcher/rset.cc\ 56 58 matcher/selectpostlist.cc\ 59 matcher/synonympostlist.cc\ 57 60 matcher/valuegepostlist.cc\ 58 61 matcher/valuerangepostlist.cc\ 59 62 matcher/xorpostlist.cc -
xapian-core/matcher/andpostlist.h
2 2 * 3 3 * Copyright 2002 Ananova Ltd 4 4 * Copyright 2003,2004,2009 Olly Betts 5 * Copyright 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 43 44 Xapian::doccount get_termfreq_min() const; 44 45 Xapian::doccount get_termfreq_est() const; 45 46 47 TermFreqs get_termfreq_est_using_stats( 48 const Xapian::Weight::Internal & stats) const; 49 46 50 Xapian::docid get_docid() const; 47 51 Xapian::weight get_weight() const; 48 52 Xapian::weight get_maxweight() const; … … 70 74 MultiMatch *matcher_, 71 75 Xapian::doccount dbsize_, 72 76 bool replacement = false); 77 78 /** get_wdf() for AND postlists returns the sum of the wdfs of the sub 79 * postlists - this is desirable when the AND is part of a synonym. 80 */ 81 Xapian::termcount get_wdf() const; 73 82 }; 74 83 75 84 #endif /* OM_HGUARD_ANDPOSTLIST_H */ -
xapian-core/matcher/multimatch.cc
794 794 795 795 LOGVALUE(MATCH, denom); 796 796 LOGVALUE(MATCH, percent_scale); 797 Assert(percent_scale <= denom); 798 denom *= greatest_wt; 799 Assert(denom > 0); 800 percent_scale /= denom; 797 AssertRel(percent_scale,<=,denom); 798 if (denom == 0) { 799 // This happens if the top-level operator is OP_SYNONYM. 800 percent_scale = 1.0 / greatest_wt; 801 } else { 802 denom *= greatest_wt; 803 AssertRel(denom,>,0); 804 percent_scale /= denom; 805 } 801 806 } else { 802 807 // If all the terms match, the 2 sums of weights cancel 803 808 percent_scale = 1.0 / greatest_wt; -
xapian-core/matcher/localmatch.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd6 * Copyright 2007,2008,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 31 31 #include "omdebug.h" 32 32 #include "omqueryinternal.h" 33 33 #include "queryoptimiser.h" 34 #include "synonympostlist.h" 34 35 #include "weightinternal.h" 35 36 36 37 #include <cfloat> … … 111 112 } 112 113 113 114 PostList * 115 LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher, 116 double factor) 117 { 118 DEBUGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist", 119 "[or_pl], [matcher], " << factor); 120 LOGVALUE(MATCH, or_pl->get_termfreq_est()); 121 AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher)); 122 AutoPtr<Xapian::Weight> wt(wt_factory->clone_()); 123 124 TermFreqs freqs(or_pl->get_termfreq_est_using_stats(*stats)); 125 wt->init_(*stats, qlen, factor, freqs.termfreq, freqs.reltermfreq); 126 127 res->set_weight(wt.release()); 128 RETURN(res.release()); 129 } 130 131 PostList * 114 132 LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query, 115 133 double factor) 116 134 { -
xapian-core/matcher/localmatch.h
2 2 * @brief SubMatch class for a local database. 3 3 */ 4 4 /* Copyright (C) 2006,2007,2009 Olly Betts 5 * Copyright (C) 2007 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 82 83 PostList * get_postlist_and_term_info(MultiMatch *matcher, 83 84 std::map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts); 84 85 86 /** Convert a postlist into a synonym postlist. 87 */ 88 PostList * make_synonym_postlist(PostList * or_pl, MultiMatch * matcher, 89 double factor); 90 85 91 /** Convert an OP_LEAF query to a PostList. 86 92 * 87 93 * This is called by QueryOptimiser when it reaches an OP_LEAF query. -
xapian-core/matcher/msetpostlist.h
2 2 * @brief PostList returning entries from an MSet 3 3 */ 4 4 /* Copyright (C) 2006,2007,2008,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 62 63 63 64 Xapian::doccount get_termfreq_max() const; 64 65 66 TermFreqs get_termfreq_est_using_stats( 67 const Xapian::Weight::Internal & stats) const; 68 65 69 Xapian::weight get_maxweight() const; 66 70 67 71 Xapian::docid get_docid() const; -
xapian-core/matcher/xorpostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 47 48 Xapian::doccount get_termfreq_max() const; 48 49 Xapian::doccount get_termfreq_min() const; 49 50 Xapian::doccount get_termfreq_est() const; 51 TermFreqs get_termfreq_est_using_stats( 52 const Xapian::Weight::Internal & stats) const; 50 53 51 54 Xapian::docid get_docid() const; 52 55 Xapian::weight get_weight() const; … … 69 72 PostList * right_, 70 73 MultiMatch * matcher_, 71 74 Xapian::doccount dbsize_); 75 76 /** get_wdf() for XOR postlists returns the wdf of the sub postlist 77 * which is at the current document. 78 */ 79 Xapian::termcount get_wdf() const; 72 80 }; 73 81 74 82 #endif /* OM_HGUARD_XORPOSTLIST_H */ -
xapian-core/matcher/synonympostlist.h
1 /** @file synonympostlist.h 2 * @brief Combine subqueries, weighting as if they are synonyms 3 */ 4 /* Copyright 2007,2009 Lemur Consulting Ltd 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 22 #define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 23 24 #include "multimatch.h" 25 #include "postlist.h" 26 27 /** A postlist comprising several postlists SYNONYMed together. 28 * 29 * This postlist returns all postings in the OR of the sub postlists, but 30 * returns weights as if they represented a single term. The term frequency 31 * portion of the weight is approximated. 32 */ 33 class SynonymPostList : public PostList { 34 /** The subtree, which starts as an OR of all the sub-postlists being 35 * joined with Synonym, but may decay into something else. 36 */ 37 PostList * subtree; 38 39 /** The object which is using this postlist to perform a match. 40 * 41 * This object needs to be notified when the tree changes such that the 42 * maximum weights need to be recalculated. 43 */ 44 MultiMatch * matcher; 45 46 /// Weighting object used for calculating the synonym weights. 47 const Xapian::Weight * wt; 48 49 /// Flag indicating whether the weighting object needs the doclength. 50 bool want_doclength; 51 52 /// Flag indicating whether the weighting object needs the wdf. 53 bool want_wdf; 54 55 /// Flag indicating if we've called recalc_maxweight on the subtree yet. 56 bool have_calculated_subtree_maxweights; 57 58 public: 59 SynonymPostList(PostList * subtree_, MultiMatch * matcher_) 60 : subtree(subtree_), matcher(matcher_), wt(NULL), 61 want_doclength(false), want_wdf(false), 62 have_calculated_subtree_maxweights(false) { } 63 64 ~SynonymPostList(); 65 66 /** Set the weight object to be used for the synonym postlist. 67 * 68 * Ownership of the weight object passes to the synonym postlist - the 69 * caller must not delete it after use. 70 */ 71 void set_weight(const Xapian::Weight * wt_); 72 73 PostList *next(Xapian::weight w_min); 74 PostList *skip_to(Xapian::docid did, Xapian::weight w_min); 75 76 Xapian::weight get_weight() const; 77 Xapian::weight get_maxweight() const; 78 Xapian::weight recalc_maxweight(); 79 80 // The following methods just call through to the subtree. 81 Xapian::termcount get_wdf() const; 82 Xapian::doccount get_termfreq_min() const; 83 Xapian::doccount get_termfreq_est() const; 84 Xapian::doccount get_termfreq_max() const; 85 TermFreqs get_termfreq_est_using_stats( 86 const Xapian::Weight::Internal & stats) const; 87 Xapian::docid get_docid() const; 88 Xapian::termcount get_doclength() const; 89 bool at_end() const; 90 91 std::string get_description() const; 92 }; 93 94 #endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */ -
xapian-core/matcher/phrasepostlist.cc
Property changes on: xapian-core/matcher/synonympostlist.h ___________________________________________________________________ Added: svn:eol-style + native
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 149 150 return std::max(wdf, 1u); 150 151 } 151 152 153 TermFreqs 154 NearPostList::get_termfreq_est_using_stats( 155 const Xapian::Weight::Internal & stats) const 156 { 157 LOGCALL(MATCH, TermFreqs, 158 "NearPostList::get_termfreq_est_using_stats", stats); 159 // No idea how to estimate this - FIXME 160 TermFreqs result(source->get_termfreq_est_using_stats(stats)); 161 result.termfreq /= 2; 162 result.reltermfreq /= 2; 163 RETURN(result); 164 } 165 152 166 std::string 153 167 NearPostList::get_description() const 154 168 { … … 264 278 return std::max(wdf / 2, 1u); 265 279 } 266 280 281 TermFreqs 282 PhrasePostList::get_termfreq_est_using_stats( 283 const Xapian::Weight::Internal & stats) const 284 { 285 LOGCALL(MATCH, TermFreqs, 286 "PhrasePostList::get_termfreq_est_using_stats", stats); 287 // No idea how to estimate this - FIXME 288 TermFreqs result(source->get_termfreq_est_using_stats(stats)); 289 result.termfreq /= 3; 290 result.reltermfreq /= 3; 291 RETURN(result); 292 } 293 267 294 std::string 268 295 PhrasePostList::get_description() const 269 296 { -
xapian-core/matcher/orpostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 41 42 Xapian::doccount get_termfreq_max() const; 42 43 Xapian::doccount get_termfreq_min() const; 43 44 Xapian::doccount get_termfreq_est() const; 45 TermFreqs get_termfreq_est_using_stats( 46 const Xapian::Weight::Internal & stats) const; 44 47 45 48 Xapian::docid get_docid() const; 46 49 Xapian::weight get_weight() const; … … 67 70 PostList * right_, 68 71 MultiMatch * matcher_, 69 72 Xapian::doccount dbsize_); 73 74 /** get_wdf() for OR postlists returns the sum of the wdfs of the 75 * sub postlists which are at the current document - this is desirable 76 * when the OR is part of a synonym. 77 */ 78 Xapian::termcount get_wdf() const; 70 79 }; 71 80 72 81 #endif /* OM_HGUARD_ORPOSTLIST_H */ -
xapian-core/matcher/andmaybepostlist.h
6 6 * Copyright 1999,2000,2001 BrightStation PLC 7 7 * Copyright 2002 Ananova Ltd 8 8 * Copyright 2003,2004,2009 Olly Betts 9 * Copyright 2009 Lemur Consulting Ltd 9 10 * 10 11 * This program is free software; you can redistribute it and/or 11 12 * modify it under the terms of the GNU General Public License as … … 61 62 Xapian::doccount get_termfreq_min() const; 62 63 Xapian::doccount get_termfreq_est() const; 63 64 65 TermFreqs get_termfreq_est_using_stats( 66 const Xapian::Weight::Internal & stats) const; 67 64 68 Xapian::docid get_docid() const; 65 69 Xapian::weight get_weight() const; 66 70 Xapian::weight get_maxweight() const; … … 103 107 lmax = l->get_maxweight(); 104 108 rmax = r->get_maxweight(); 105 109 } 110 111 /** get_wdf() for ANDMAYBE postlists returns the sum of the wdfs of the 112 * sub postlists which are at the current document - this is desirable 113 * when the ANDMAYBE is part of a synonym. 114 */ 115 Xapian::termcount get_wdf() const; 106 116 }; 107 117 108 118 #endif /* OM_HGUARD_ANDMAYBEPOSTLIST_H */ -
xapian-core/matcher/externalpostlist.cc
72 72 return source->get_termfreq_max(); 73 73 } 74 74 75 TermFreqs 76 ExternalPostList::get_termfreq_est_using_stats( 77 const Xapian::Weight::Internal &) const 78 { 79 // Should never get called. 80 Assert(false); 81 return TermFreqs(); 82 } 83 75 84 Xapian::weight 76 85 ExternalPostList::get_maxweight() const 77 86 { -
xapian-core/matcher/andnotpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 124 125 RETURN(static_cast<Xapian::doccount>(est + 0.5)); 125 126 } 126 127 128 TermFreqs 129 AndNotPostList::get_termfreq_est_using_stats( 130 const Xapian::Weight::Internal & stats) const 131 { 132 LOGCALL(MATCH, TermFreqs, 133 "AndNotPostList::get_termfreq_est_using_stats", stats); 134 // Estimate assuming independence: 135 // P(l and r) = P(l) . P(r) 136 // P(l not r) = P(l) - P(l and r) = P(l) . ( 1 - P(r)) 137 TermFreqs lfreqs(l->get_termfreq_est_using_stats(stats)); 138 TermFreqs rfreqs(r->get_termfreq_est_using_stats(stats)); 139 140 double freqest, relfreqest; 141 142 if (stats.collection_size == 0) { 143 freqest = 0; 144 } else { 145 freqest = lfreqs.termfreq * 146 (1.0 - (double(rfreqs.termfreq) / stats.collection_size)); 147 } 148 149 if (stats.rset_size == 0) { 150 relfreqest = 0; 151 } else { 152 relfreqest = lfreqs.reltermfreq * 153 (1.0 - (double(rfreqs.reltermfreq) / stats.rset_size)); 154 } 155 156 RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5), 157 static_cast<Xapian::doccount>(relfreqest + 0.5))); 158 } 159 127 160 Xapian::docid 128 161 AndNotPostList::get_docid() const 129 162 { … … 175 208 DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_doclength", ""); 176 209 RETURN(l->get_doclength()); 177 210 } 211 212 Xapian::termcount 213 AndNotPostList::get_wdf() const 214 { 215 DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_wdf", ""); 216 RETURN(l->get_wdf()); 217 } -
xapian-core/matcher/andnotpostlist.h
Property changes on: xapian-core/matcher/collapser.h ___________________________________________________________________ Deleted: svn:mergeinfo
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 42 43 Xapian::doccount get_termfreq_max() const; 43 44 Xapian::doccount get_termfreq_min() const; 44 45 Xapian::doccount get_termfreq_est() const; 46 TermFreqs get_termfreq_est_using_stats( 47 const Xapian::Weight::Internal & stats) const; 45 48 46 49 Xapian::docid get_docid() const; 47 50 Xapian::weight get_weight() const; … … 69 72 Xapian::weight w_min, 70 73 Xapian::docid lh, 71 74 Xapian::docid rh); 75 76 /** get_wdf() for ANDNOT postlists returns the wdf of the left hand 77 * side. 78 */ 79 Xapian::termcount get_wdf() const; 72 80 }; 73 81 74 82 #endif /* OM_HGUARD_ANDNOTPOSTLIST_H */ -
xapian-core/matcher/valuerangepostlist.cc
2 2 * @brief Return document ids matching a range test on a specified doc value. 3 3 */ 4 4 /* Copyright 2007,2008,2009 Olly Betts 5 * Copyright 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 50 51 return db_size / 2; 51 52 } 52 53 54 TermFreqs 55 ValueRangePostList::get_termfreq_est_using_stats( 56 const Xapian::Weight::Internal & stats) const 57 { 58 LOGCALL(MATCH, TermFreqs, 59 "ValueRangePostList::get_termfreq_est_using_stats", stats); 60 // FIXME: It's hard to estimate well - perhaps consider the values of 61 // begin and end? 62 RETURN(TermFreqs(stats.collection_size / 2, stats.rset_size / 2)); 63 } 64 53 65 Xapian::doccount 54 66 ValueRangePostList::get_termfreq_max() const 55 67 { -
xapian-core/matcher/valuerangepostlist.h
2 2 * @brief Return document ids matching a range test on a specified doc value. 3 3 */ 4 4 /* Copyright 2007,2008,2009 Olly Betts 5 * Copyright 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 59 60 60 61 Xapian::doccount get_termfreq_max() const; 61 62 63 TermFreqs get_termfreq_est_using_stats( 64 const Xapian::Weight::Internal & stats) const; 65 62 66 Xapian::weight get_maxweight() const; 63 67 64 68 Xapian::docid get_docid() const; -
xapian-core/matcher/queryoptimiser.cc
82 82 case Xapian::Query::OP_ELITE_SET: 83 83 RETURN(do_or_like(query, factor)); 84 84 85 case Xapian::Query::OP_SYNONYM: 86 RETURN(do_synonym(query, factor)); 87 85 88 case Xapian::Query::OP_AND_NOT: { 86 89 AssertEq(query->subqs.size(), 2); 87 90 PostList * l = do_subquery(query->subqs[0], factor); … … 304 307 // for AND-like operations. 305 308 Xapian::Query::Internal::op_t op = query->op; 306 309 Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR || 307 op == Xapian::Query::OP_XOR );310 op == Xapian::Query::OP_XOR || op == Xapian::Query::OP_SYNONYM); 308 311 309 312 const Xapian::Query::Internal::subquery_list &queries = query->subqs; 310 313 AssertRel(queries.size(), >=, 2); … … 382 385 ComparePostListTermFreqAscending()); 383 386 } 384 387 } 388 389 PostList * 390 QueryOptimiser::do_synonym(const Xapian::Query::Internal *query, double factor) 391 { 392 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_synonym", 393 query << ", " << factor); 394 if (factor == 0.0) { 395 // If we have a factor of 0, we don't care about the weights, so 396 // we're just like a normal OR query. 397 RETURN(do_or_like(query, 0.0)); 398 } 399 400 // We currently assume wqf is 1 for calculating the synonym's weight 401 // since conceptually the synonym is one "virtual" term. If we were 402 // to combine multiple occurrences of the same synonym expansion into 403 // a single instance with wqf set, we would want to use the wqf. 404 AssertEq(query->wqf, 0); 405 406 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a 407 // SynonymPostList, which supplies the weights. 408 RETURN(localsubmatch.make_synonym_postlist(do_or_like(query, 0.0), 409 matcher, factor)); 410 } -
xapian-core/matcher/mergepostlist.cc
137 137 return total; 138 138 } 139 139 140 TermFreqs 141 MergePostList::get_termfreq_est_using_stats( 142 const Xapian::Weight::Internal &) const 143 { 144 // Should never get called. 145 Assert(false); 146 return TermFreqs(); 147 } 148 140 149 Xapian::docid 141 150 MergePostList::get_docid() const 142 151 { -
xapian-core/matcher/mergepostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2009 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd6 * Copyright 2007,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 53 53 Xapian::doccount get_termfreq_max() const; 54 54 Xapian::doccount get_termfreq_min() const; 55 55 Xapian::doccount get_termfreq_est() const; 56 TermFreqs get_termfreq_est_using_stats( 57 const Xapian::Weight::Internal & stats) const; 56 58 57 59 Xapian::docid get_docid() const; 58 60 Xapian::weight get_weight() const; -
xapian-core/matcher/queryoptimiser.h
2 2 * @brief Convert a Xapian::Query::Internal tree into an optimal PostList tree. 3 3 */ 4 4 /* Copyright (C) 2007,2008,2009 Olly Betts 5 * Copyright (C) 2008 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 88 89 */ 89 90 PostList * do_or_like(const Xapian::Query::Internal *query, double factor); 90 91 92 /** Optimise a synonym Xapian::Query::Internal subtree into a PostList 93 * 94 * @param query The subtree to optimise. 95 * @param factor How much to scale weights for this subtree by. 96 * 97 * @return A PostList subtree. 98 */ 99 PostList * do_synonym(const Xapian::Query::Internal *query, double factor); 100 91 101 public: 92 102 QueryOptimiser(const Xapian::Database::Internal & db_, 93 103 LocalSubMatch & localsubmatch_, -
xapian-core/matcher/andpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd6 * Copyright 2007,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 149 149 RETURN(static_cast<Xapian::doccount>(lest * rest / dbsize + 0.5)); 150 150 } 151 151 152 TermFreqs 153 AndPostList::get_termfreq_est_using_stats( 154 const Xapian::Weight::Internal & stats) const 155 { 156 LOGCALL(MATCH, TermFreqs, 157 "AndPostList::get_termfreq_est_using_stats", stats); 158 // Estimate assuming independence: 159 // P(l and r) = P(l) . P(r) 160 TermFreqs lfreqs(l->get_termfreq_est_using_stats(stats)); 161 TermFreqs rfreqs(r->get_termfreq_est_using_stats(stats)); 162 163 double freqest, relfreqest; 164 165 if (stats.collection_size == 0) { 166 freqest = 0; 167 } else { 168 freqest = double(lfreqs.termfreq) * 169 double(rfreqs.termfreq) / stats.collection_size; 170 } 171 172 if (stats.rset_size == 0) { 173 relfreqest = 0; 174 } else { 175 relfreqest = double(lfreqs.reltermfreq) * 176 double(rfreqs.reltermfreq) / stats.rset_size; 177 } 178 179 RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5), 180 static_cast<Xapian::doccount>(relfreqest + 0.5))); 181 } 182 152 183 Xapian::docid 153 184 AndPostList::get_docid() const 154 185 { … … 203 234 AssertEq(doclength, r->get_doclength()); 204 235 RETURN(doclength); 205 236 } 237 238 Xapian::termcount 239 AndPostList::get_wdf() const 240 { 241 DEBUGCALL(MATCH, Xapian::termcount, "AndPostList::get_wdf", ""); 242 RETURN(l->get_wdf() + r->get_wdf()); 243 } -
xapian-core/matcher/exactphrasepostlist.cc
154 154 return source->get_termfreq_est() / 4; 155 155 } 156 156 157 TermFreqs 158 ExactPhrasePostList::get_termfreq_est_using_stats( 159 const Xapian::Weight::Internal & stats) const 160 { 161 LOGCALL(MATCH, TermFreqs, 162 "ExactPhrasePostList::get_termfreq_est_using_stats", stats); 163 // No idea how to estimate this - do the same as get_termfreq_est() for 164 // now. 165 TermFreqs result(source->get_termfreq_est_using_stats(stats)); 166 result.termfreq /= 4; 167 result.reltermfreq /= 4; 168 RETURN(result); 169 } 170 157 171 string 158 172 ExactPhrasePostList::get_description() const 159 173 { -
xapian-core/matcher/msetpostlist.cc
2 2 * @brief PostList returning entries from an MSet 3 3 */ 4 4 /* Copyright (C) 2006,2007,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 45 46 RETURN(mset_internal->matches_upper_bound); 46 47 } 47 48 49 TermFreqs 50 MSetPostList::get_termfreq_est_using_stats( 51 const Xapian::Weight::Internal &) const 52 { 53 // Should never get called. 54 Assert(false); 55 return TermFreqs(); 56 } 57 48 58 Xapian::weight 49 59 MSetPostList::get_maxweight() const 50 60 { -
xapian-core/matcher/xorpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 232 233 RETURN(static_cast<Xapian::doccount>(est + 0.5)); 233 234 } 234 235 236 TermFreqs 237 XorPostList::get_termfreq_est_using_stats( 238 const Xapian::Weight::Internal & stats) const 239 { 240 LOGCALL(MATCH, TermFreqs, 241 "XorPostList::get_termfreq_est_using_stats", stats); 242 // Estimate assuming independence: 243 // P(l or r) = P(l) + P(r) - 2 . P(l) . P(r) 244 TermFreqs lfreqs(l->get_termfreq_est_using_stats(stats)); 245 TermFreqs rfreqs(r->get_termfreq_est_using_stats(stats)); 246 247 double freqest, relfreqest; 248 249 if (stats.collection_size == 0) { 250 freqest = 0; 251 } else { 252 freqest = lfreqs.termfreq + rfreqs.termfreq 253 - (2.0 * lfreqs.termfreq * rfreqs.termfreq 254 / stats.collection_size); 255 } 256 257 if (stats.collection_size == 0) { 258 relfreqest = 0; 259 } else { 260 relfreqest = lfreqs.reltermfreq + rfreqs.reltermfreq 261 - (2.0 * lfreqs.reltermfreq * rfreqs.reltermfreq 262 / stats.rset_size); 263 } 264 265 RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5), 266 static_cast<Xapian::doccount>(relfreqest + 0.5))); 267 } 268 235 269 Xapian::docid 236 270 XorPostList::get_docid() const 237 271 { … … 294 328 Assert(lhead > rhead); 295 329 return r->get_doclength(); 296 330 } 331 332 Xapian::termcount 333 XorPostList::get_wdf() const 334 { 335 DEBUGCALL(MATCH, Xapian::termcount, "XorPostList::get_wdf", ""); 336 if (lhead < rhead) RETURN(l->get_wdf()); 337 RETURN(r->get_wdf()); 338 } -
xapian-core/matcher/exactphrasepostlist.h
2 2 * @brief Return docs containing terms forming a particular exact phrase. 3 3 * 4 4 * Copyright (C) 2006 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 58 59 59 60 Xapian::doccount get_termfreq_est() const; 60 61 62 TermFreqs get_termfreq_est_using_stats( 63 const Xapian::Weight::Internal & stats) const; 64 61 65 std::string get_description() const; 62 66 }; 63 67 -
xapian-core/matcher/rset.cc
81 81 DEBUGCALL(MATCH, void, "RSetI::contribute_stats", stats); 82 82 calculate_stats(); 83 83 84 Xapian::TermFreqMap::const_iterator i;84 std::map<std::string, Xapian::doccount>::const_iterator i; 85 85 for (i = reltermfreqs.begin(); i != reltermfreqs.end(); i++) { 86 86 stats.set_reltermfreq(i->first, i->second); 87 87 } -
xapian-core/matcher/synonympostlist.cc
1 /** @file synonympostlist.cc 2 * @brief Combine subqueries, weighting as if they are synonyms 3 */ 4 /* Copyright 2007,2009 Lemur Consulting Ltd 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation; either version 2 of the 9 * License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 19 * USA 20 */ 21 22 #include <config.h> 23 24 #include "synonympostlist.h" 25 26 #include "branchpostlist.h" 27 #include "debuglog.h" 28 29 SynonymPostList::~SynonymPostList() 30 { 31 delete wt; 32 delete subtree; 33 } 34 35 void 36 SynonymPostList::set_weight(const Xapian::Weight * wt_) 37 { 38 delete wt; 39 wt = wt_; 40 want_doclength = wt->get_sumpart_needs_doclength_(); 41 want_wdf = wt->get_sumpart_needs_wdf_(); 42 } 43 44 PostList * 45 SynonymPostList::next(Xapian::weight w_min) 46 { 47 LOGCALL(MATCH, PostList *, "SynonymPostList::next", w_min); 48 (void)w_min; 49 next_handling_prune(subtree, 0, matcher); 50 RETURN(NULL); 51 } 52 53 PostList * 54 SynonymPostList::skip_to(Xapian::docid did, Xapian::weight w_min) 55 { 56 LOGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did << ", " << w_min); 57 (void)w_min; 58 skip_to_handling_prune(subtree, did, 0, matcher); 59 RETURN(NULL); 60 } 61 62 Xapian::weight 63 SynonymPostList::get_weight() const 64 { 65 LOGCALL(MATCH, Xapian::weight, "SynonymPostList::get_weight", ""); 66 // The wdf returned can be higher than the doclength. In particular, this 67 // can currently occur if the query contains a term more than once; the wdf 68 // of each occurrence is added up. 69 // 70 // However, it's reasonable for weighting algorithms to optimise by 71 // assuming that get_wdf() will never return more than get_doclength(), 72 // since the doclength is the sum of the wdfs. 73 // 74 // Therefore, we simply clamp the wdf value to the doclength, to ensure 75 // that this is true. Note that this requires the doclength to be 76 // calculated even if the weight object doesn't want it. 77 78 if (want_wdf) { 79 Xapian::termcount wdf = get_wdf(); 80 Xapian::termcount doclen = get_doclength(); 81 if (wdf > doclen) wdf = doclen; 82 RETURN(wt->get_sumpart(wdf, doclen)); 83 } 84 RETURN(wt->get_sumpart(0, want_doclength ? get_doclength() : 0)); 85 } 86 87 Xapian::weight 88 SynonymPostList::get_maxweight() const 89 { 90 LOGCALL(MATCH, Xapian::weight, "SynonymPostList::get_maxweight", ""); 91 RETURN(wt->get_maxpart()); 92 } 93 94 Xapian::weight 95 SynonymPostList::recalc_maxweight() 96 { 97 LOGCALL(MATCH, Xapian::weight, "SynonymPostList::recalc_maxweight", ""); 98 99 // Call recalc_maxweight on the subtree once, to ensure that the maxweights 100 // are initialised. 101 if (!have_calculated_subtree_maxweights) { 102 subtree->recalc_maxweight(); 103 have_calculated_subtree_maxweights = true; 104 } 105 RETURN(SynonymPostList::get_maxweight()); 106 } 107 108 Xapian::termcount 109 SynonymPostList::get_wdf() const { 110 LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_wdf", ""); 111 RETURN(subtree->get_wdf()); 112 } 113 114 Xapian::doccount 115 SynonymPostList::get_termfreq_min() const { 116 LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", ""); 117 RETURN(subtree->get_termfreq_min()); 118 } 119 120 Xapian::doccount 121 SynonymPostList::get_termfreq_est() const { 122 LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", ""); 123 RETURN(subtree->get_termfreq_est()); 124 } 125 126 Xapian::doccount 127 SynonymPostList::get_termfreq_max() const { 128 LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", ""); 129 RETURN(subtree->get_termfreq_max()); 130 } 131 132 TermFreqs 133 SynonymPostList::get_termfreq_est_using_stats( 134 const Xapian::Weight::Internal & stats) const 135 { 136 LOGCALL(MATCH, TermFreqs, 137 "SynonymPostList::get_termfreq_est_using_stats", stats); 138 RETURN(subtree->get_termfreq_est_using_stats(stats)); 139 } 140 141 Xapian::docid 142 SynonymPostList::get_docid() const { 143 LOGCALL(MATCH, Xapian::docid, "SynonymPostList::get_docid", ""); 144 RETURN(subtree->get_docid()); 145 } 146 147 Xapian::termcount 148 SynonymPostList::get_doclength() const { 149 LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_doclength", ""); 150 RETURN(subtree->get_doclength()); 151 } 152 153 bool 154 SynonymPostList::at_end() const { 155 LOGCALL(MATCH, bool, "SynonymPostList::at_end", ""); 156 RETURN(subtree->at_end()); 157 } 158 159 std::string 160 SynonymPostList::get_description() const 161 { 162 return "(Synonym " + subtree->get_description() + ")"; 163 } -
xapian-core/matcher/multiandpostlist.cc
Property changes on: xapian-core/matcher/synonympostlist.cc ___________________________________________________________________ Added: svn:eol-style + native
2 2 * @brief N-way AND postlist 3 3 */ 4 4 /* Copyright (C) 2007,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 22 23 23 24 #include "multiandpostlist.h" 24 25 #include "omassert.h" 26 #include "debuglog.h" 25 27 26 28 MultiAndPostList::~MultiAndPostList() 27 29 { … … 84 86 return static_cast<Xapian::doccount>(result + 0.5); 85 87 } 86 88 89 TermFreqs 90 MultiAndPostList::get_termfreq_est_using_stats( 91 const Xapian::Weight::Internal & stats) const 92 { 93 LOGCALL(MATCH, TermFreqs, 94 "MultiAndPostList::get_termfreq_est_using_stats", stats); 95 // We calculate the estimate assuming independence. With this assumption, 96 // the estimate is the product of the estimates for the sub-postlists 97 // divided by db_size (n_kids - 1) times. 98 TermFreqs freqs(plist[0]->get_termfreq_est_using_stats(stats)); 99 100 double freqest = double(freqs.termfreq); 101 double relfreqest = double(freqs.reltermfreq); 102 103 for (size_t i = 1; i < n_kids; ++i) { 104 freqs = plist[i]->get_termfreq_est_using_stats(stats); 105 106 // If the collection is empty, freqest should be 0 already, so leave 107 // it alone. 108 if (stats.collection_size != 0) 109 freqest = (freqest * freqs.termfreq) / stats.collection_size; 110 111 // If the rset is empty, relfreqest should be 0 already, so leave 112 // it alone. 113 if (stats.rset_size != 0) 114 relfreqest = (relfreqest * freqs.reltermfreq) / stats.rset_size; 115 } 116 117 RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5), 118 static_cast<Xapian::doccount>(relfreqest + 0.5))); 119 } 120 87 121 Xapian::weight 88 122 MultiAndPostList::get_maxweight() const 89 123 { -
xapian-core/matcher/multiandpostlist.h
2 2 * @brief N-way AND postlist 3 3 */ 4 4 /* Copyright (C) 2007,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 135 136 136 137 Xapian::doccount get_termfreq_est() const; 137 138 139 TermFreqs get_termfreq_est_using_stats( 140 const Xapian::Weight::Internal & stats) const; 141 138 142 Xapian::weight get_maxweight() const; 139 143 140 144 Xapian::docid get_docid() const; … … 154 158 std::string get_description() const; 155 159 156 160 /** get_wdf() for MultiAndPostlists returns the sum of the wdfs of the 157 * sub postlists. The wdf isn't really meaningful in many situations, 158 * but if the lists are being combined as a synonym we want the sum of 159 * the wdfs, so we do that in general. 161 * sub postlists. 162 * 163 * The wdf isn't really meaningful in many situations, but if the lists 164 * are being combined as a synonym we want the sum of the wdfs, so we do 165 * that in general. 160 166 */ 161 virtualXapian::termcount get_wdf() const;167 Xapian::termcount get_wdf() const; 162 168 }; 163 169 164 170 #endif // XAPIAN_INCLUDED_MULTIANDPOSTLIST_H -
xapian-core/matcher/andmaybepostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2005,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 112 113 RETURN(l->get_termfreq_est()); 113 114 } 114 115 116 TermFreqs 117 AndMaybePostList::get_termfreq_est_using_stats( 118 const Xapian::Weight::Internal & stats) const 119 { 120 LOGCALL(MATCH, TermFreqs, 121 "AndMaybePostList::get_termfreq_est_using_stats", stats); 122 // Termfreq is exactly that of left hand branch. 123 RETURN(l->get_termfreq_est_using_stats(stats)); 124 } 125 115 126 Xapian::docid 116 127 AndMaybePostList::get_docid() const 117 128 { … … 169 180 if (lhead == rhead) AssertEq(l->get_doclength(), r->get_doclength()); 170 181 RETURN(l->get_doclength()); 171 182 } 183 184 Xapian::termcount 185 AndMaybePostList::get_wdf() const 186 { 187 DEBUGCALL(MATCH, Xapian::termcount, "AndMaybePostList::get_wdf", ""); 188 if (lhead == rhead) RETURN(l->get_wdf() + r->get_wdf()); 189 RETURN(l->get_wdf()); 190 } -
xapian-core/matcher/orpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2001,2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 184 185 RETURN(static_cast<Xapian::doccount>(est + 0.5)); 185 186 } 186 187 188 TermFreqs 189 OrPostList::get_termfreq_est_using_stats( 190 const Xapian::Weight::Internal & stats) const 191 { 192 LOGCALL(MATCH, TermFreqs, 193 "OrPostList::get_termfreq_est_using_stats", stats); 194 // Estimate assuming independence: 195 // P(l or r) = P(l) + P(r) - P(l) . P(r) 196 TermFreqs lfreqs(l->get_termfreq_est_using_stats(stats)); 197 TermFreqs rfreqs(r->get_termfreq_est_using_stats(stats)); 198 199 double freqest, relfreqest; 200 201 if (stats.collection_size == 0) { 202 freqest = 0; 203 } else { 204 freqest = lfreqs.termfreq + rfreqs.termfreq - 205 (lfreqs.termfreq * rfreqs.termfreq / stats.collection_size); 206 } 207 208 if (stats.rset_size == 0) { 209 relfreqest = 0; 210 } else { 211 relfreqest = lfreqs.reltermfreq + rfreqs.reltermfreq - 212 (lfreqs.reltermfreq * rfreqs.reltermfreq / stats.rset_size); 213 } 214 215 RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5), 216 static_cast<Xapian::doccount>(relfreqest + 0.5))); 217 } 218 187 219 Xapian::docid 188 220 OrPostList::get_docid() const 189 221 { … … 258 290 259 291 RETURN(doclength); 260 292 } 293 294 Xapian::termcount 295 OrPostList::get_wdf() const 296 { 297 DEBUGCALL(MATCH, Xapian::termcount, "OrPostList::get_wdf", ""); 298 if (lhead < rhead) RETURN(l->get_wdf()); 299 if (lhead > rhead) RETURN(r->get_wdf()); 300 RETURN(l->get_wdf() + r->get_wdf()); 301 } -
xapian-core/matcher/externalpostlist.h
56 56 57 57 Xapian::doccount get_termfreq_max() const; 58 58 59 TermFreqs get_termfreq_est_using_stats( 60 const Xapian::Weight::Internal & stats) const; 61 59 62 Xapian::weight get_maxweight() const; 60 63 61 64 Xapian::docid get_docid() const; -
xapian-core/matcher/phrasepostlist.h
3 3 * ----START-LICENCE---- 4 4 * Copyright 1999,2000,2001 BrightStation PLC 5 5 * Copyright 2003,2004,2005 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 53 54 return source->get_termfreq_est() / 2; 54 55 } 55 56 57 TermFreqs get_termfreq_est_using_stats( 58 const Xapian::Weight::Internal & stats) const; 59 56 60 NearPostList(PostList *source_, Xapian::termpos window_, 57 61 std::vector<PostList *> terms_) 58 62 : SelectPostList(source_) … … 87 91 return source->get_termfreq_est() / 3; 88 92 } 89 93 94 TermFreqs get_termfreq_est_using_stats( 95 const Xapian::Weight::Internal & stats) const; 96 90 97 PhrasePostList(PostList *source_, Xapian::termpos window_, 91 98 std::vector<PostList *> terms_) : SelectPostList(source_) 92 99 { -
xapian-core/weight/weightinternal.cc
28 28 29 29 using namespace std; 30 30 31 std::string 32 TermFreqs::get_description() const { 33 return std::string("TermFreqs(") + om_tostring(termfreq) + ", " + 34 om_tostring(reltermfreq) + ")"; 35 } 36 31 37 namespace Xapian { 32 38 33 39 Weight::Internal & … … 38 44 rset_size += inc.rset_size; 39 45 40 46 // Add termfreqs and reltermfreqs 41 TermFreqMap::const_iterator i; 42 for (i = inc.termfreq.begin(); i != inc.termfreq.end(); ++i) { 43 termfreq[i->first] += i->second; 44 } 45 for (i = inc.reltermfreq.begin(); i != inc.reltermfreq.end(); ++i) { 46 reltermfreq[i->first] += i->second; 47 map<string, TermFreqs>::const_iterator i; 48 for (i = inc.termfreqs.begin(); i != inc.termfreqs.end(); ++i) { 49 termfreqs[i->first] += i->second; 47 50 } 48 51 return *this; 49 52 } … … 54 57 // We pass an empty std::string for term when calculating the extra weight. 55 58 if (term.empty()) return 0; 56 59 57 TermFreqMap::const_iterator tfreq = termfreq.find(term);58 Assert(tfreq != termfreq .end());59 return tfreq->second ;60 map<string, TermFreqs>::const_iterator tfreq = termfreqs.find(term); 61 Assert(tfreq != termfreqs.end()); 62 return tfreq->second.termfreq; 60 63 } 61 64 62 65 void … … 64 67 { 65 68 // Can be called a second time, if a term occurs multiple times in the 66 69 // query; if this happens, the termfreq should be the same each time. 67 Assert(termfreq.find(term) == termfreq.end() || 68 termfreq.find(term)->second == tfreq); 69 termfreq[term] = tfreq; 70 Assert(termfreqs.find(term) == termfreqs.end() || 71 termfreqs.find(term)->second.termfreq == 0 || 72 termfreqs.find(term)->second.termfreq == tfreq); 73 termfreqs[term].termfreq = tfreq; 70 74 } 71 75 72 76 Xapian::doccount … … 75 79 // We pass an empty string for term when calculating the extra weight. 76 80 if (term.empty()) return 0; 77 81 78 TermFreqMap::const_iterator rtfreq = reltermfreq.find(term);79 Assert( rtfreq != reltermfreq.end());80 return rtfreq->second;82 map<string, TermFreqs>::const_iterator tfreq = termfreqs.find(term); 83 Assert(tfreq != termfreqs.end()); 84 return tfreq->second.reltermfreq; 81 85 } 82 86 83 87 void … … 85 89 { 86 90 // Can be called a second time, if a term occurs multiple times in the 87 91 // query; if this happens, the termfreq should be the same each time. 88 Assert(reltermfreq.find(term) == reltermfreq.end() || 89 reltermfreq.find(term)->second == rtfreq); 90 reltermfreq[term] = rtfreq; 92 Assert(termfreqs.find(term) == termfreqs.end() || 93 termfreqs.find(term)->second.reltermfreq == 0 || 94 termfreqs.find(term)->second.reltermfreq == rtfreq); 95 termfreqs[term].reltermfreq = rtfreq; 91 96 } 92 97 93 98 string -
xapian-core/weight/weight.cc
2 2 * @brief Xapian::Weight base class 3 3 */ 4 4 /* Copyright (C) 2007,2008,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 77 78 init(factor); 78 79 } 79 80 81 void 82 Weight::init_(const Internal & stats, Xapian::termcount query_length, 83 double factor, Xapian::doccount termfreq, 84 Xapian::doccount reltermfreq) 85 { 86 LOGCALL_VOID(MATCH, "Weight::init_", stats << ", " << query_length << 87 ", " << factor << ", " << termfreq << ", " << reltermfreq); 88 // Synonym case. 89 collection_size_ = stats.collection_size; 90 rset_size_ = stats.rset_size; 91 if (stats_needed & AVERAGE_LENGTH) 92 average_length_ = stats.get_average_length(); 93 if (stats_needed & DOC_LENGTH_MAX) 94 doclength_upper_bound_ = stats.db.get_doclength_upper_bound(); 95 if (stats_needed & DOC_LENGTH_MIN) 96 doclength_lower_bound_ = stats.db.get_doclength_lower_bound(); 97 98 // The doclength is an upper bound on the wdf. This is obviously true for 99 // normal terms, but SynonymPostList ensures that it is also true for 100 // synonym terms by clamping the wdf values returned to the doclength. 101 // 102 // (This clamping is only actually necessary in cases where a constituent 103 // term of the synonym is repeated.) 104 if (stats_needed & WDF_MAX) 105 wdf_upper_bound_ = stats.db.get_doclength_upper_bound(); 106 107 termfreq_ = termfreq; 108 reltermfreq_ = reltermfreq; 109 query_length_ = query_length; 110 wqf_ = 1; 111 init(factor); 112 } 113 80 114 Weight::~Weight() { } 81 115 82 116 } -
xapian-core/tests/api_opsynonym.cc
1 /** @file api_opsynonym.cc 2 * @brief tests of OP_SYNONYM. 3 */ 4 /* Copyright 2009 Olly Betts 5 * Copyright 2007,2008,2009 Lemur Consulting Ltd 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2 of the 10 * License, or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 20 * USA 21 */ 22 23 #include <config.h> 24 25 #include "api_opsynonym.h" 26 27 #include <map> 28 #include <set> 29 #include <vector> 30 31 #include <xapian.h> 32 33 #include "backendmanager.h" 34 #include "testsuite.h" 35 #include "testutils.h" 36 37 #include "apitest.h" 38 39 using namespace std; 40 41 // ####################################################################### 42 // # Tests start here 43 44 // Check a synonym search 45 DEFINE_TESTCASE(synonym1, backend) { 46 Xapian::Database db(get_database("etext")); 47 48 TEST_REL(db.get_doclength_upper_bound(), >, 0); 49 50 Xapian::doccount lots = 214; 51 52 // Make a list of lists of subqueries, which are going to be joined 53 // together as a synonym. 54 vector<vector<Xapian::Query> > subqueries_list; 55 56 // For each set of subqueries, keep a list of the number of results for 57 // which the weight should be the same when combined with OP_SYNONYM as 58 // when combined with OP_OR. 59 vector<int> subqueries_sameweight_count; 60 vector<int> subqueries_diffweight_count; 61 62 vector<Xapian::Query> subqueries; 63 subqueries.push_back(Xapian::Query("date")); 64 subqueries_list.push_back(subqueries); 65 // Single term - all 33 results should be same weight. 66 subqueries_sameweight_count.push_back(33); 67 subqueries_diffweight_count.push_back(0); 68 69 // Two terms, which co-occur in some documents. 70 subqueries.clear(); 71 subqueries.push_back(Xapian::Query("sky")); 72 subqueries.push_back(Xapian::Query("date")); 73 subqueries_list.push_back(subqueries); 74 // All 34 results should be different. 75 subqueries_sameweight_count.push_back(0); 76 subqueries_diffweight_count.push_back(34); 77 78 // Two terms which are entirely disjoint, and where the maximum weight 79 // doesn't occur in the first or second match. 80 subqueries.clear(); 81 subqueries.push_back(Xapian::Query("gutenberg")); 82 subqueries.push_back(Xapian::Query("blockhead")); 83 subqueries_list.push_back(subqueries); 84 // All 18 results should be different. 85 subqueries_sameweight_count.push_back(0); 86 subqueries_diffweight_count.push_back(18); 87 88 subqueries.clear(); 89 subqueries.push_back(Xapian::Query("date")); 90 subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 91 Xapian::Query("sky"), 92 Xapian::Query("glove"))); 93 subqueries_list.push_back(subqueries); 94 // All 34 results should be different. 95 subqueries_sameweight_count.push_back(0); 96 subqueries_diffweight_count.push_back(34); 97 98 subqueries.clear(); 99 subqueries.push_back(Xapian::Query("date")); 100 subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 101 Xapian::Query("sky"), 102 Xapian::Query("date"))); 103 subqueries_list.push_back(subqueries); 104 // All 34 results should be different. 105 subqueries_sameweight_count.push_back(0); 106 subqueries_diffweight_count.push_back(34); 107 108 subqueries.clear(); 109 subqueries.push_back(Xapian::Query("date")); 110 subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND_MAYBE, 111 Xapian::Query("sky"), 112 Xapian::Query("date"))); 113 subqueries_list.push_back(subqueries); 114 // All 34 results should be different. 115 subqueries_sameweight_count.push_back(0); 116 subqueries_diffweight_count.push_back(34); 117 118 subqueries.clear(); 119 subqueries.push_back(Xapian::Query("date")); 120 subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND_NOT, 121 Xapian::Query("sky"), 122 Xapian::Query("date"))); 123 subqueries_list.push_back(subqueries); 124 // All 34 results should be different. 125 subqueries_sameweight_count.push_back(0); 126 subqueries_diffweight_count.push_back(34); 127 128 subqueries.clear(); 129 subqueries.push_back(Xapian::Query("date")); 130 subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND, 131 Xapian::Query("sky"), 132 Xapian::Query("date"))); 133 subqueries_list.push_back(subqueries); 134 // The AND only matches 1 document, so the estimated termfreq for the whole 135 // synonym works out as 33 (due to rounding), which is the same as the 136 // termfreq for "date". Therefore most of the weights are the same as just 137 // for the pure "date" search, and the only document which gets a different 138 // weight is the one also matched by "sky" (because it has a wdf boost). 139 subqueries_sameweight_count.push_back(32); 140 subqueries_diffweight_count.push_back(1); 141 142 subqueries.clear(); 143 subqueries.push_back(Xapian::Query("date")); 144 subqueries.push_back(Xapian::Query(Xapian::Query::OP_XOR, 145 Xapian::Query("sky"), 146 Xapian::Query("date"))); 147 subqueries_list.push_back(subqueries); 148 // All 34 results should be different. 149 subqueries_sameweight_count.push_back(0); 150 subqueries_diffweight_count.push_back(34); 151 152 subqueries.clear(); 153 subqueries.push_back(Xapian::Query("date")); 154 subqueries.push_back(Xapian::Query(Xapian::Query::OP_SYNONYM, 155 Xapian::Query("sky"), 156 Xapian::Query("date"))); 157 subqueries_list.push_back(subqueries); 158 // When the top-level operator is OR, the synonym part has an estimated 159 // termfreq of 35. When the top-level operator is SYNONYM, the whole query 160 // has an estimated termfreq of 35, and is in fact the same as the synonmyn 161 // part in the OR query, except that the wqf of "date" is 2. We're 162 // currently not using the wqfs of components of synonyms, so this 163 // difference has no effect on the weightings. Therefore, for the 1 164 // document which does not contain "data", we get the same result with 165 // SYNONYM as with OR. 166 subqueries_sameweight_count.push_back(1); 167 subqueries_diffweight_count.push_back(33); 168 169 subqueries.clear(); 170 subqueries.push_back(Xapian::Query("sky")); 171 subqueries.push_back(Xapian::Query("date")); 172 subqueries.push_back(Xapian::Query("stein")); 173 subqueries.push_back(Xapian::Query("ally")); 174 subqueries_list.push_back(subqueries); 175 // All 35 results should be different. 176 subqueries_sameweight_count.push_back(0); 177 subqueries_diffweight_count.push_back(35); 178 179 subqueries.clear(); 180 subqueries.push_back(Xapian::Query("attitud")); 181 subqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE, 182 Xapian::Query("german"), 183 Xapian::Query("adventur"))); 184 subqueries_list.push_back(subqueries); 185 // The estimated term frequency for the synoynm is 2 (because the estimate 186 // for the phrase is 0), which is the same as the term frequency of 187 // "attitud". Thus, the synonym gets the same weight as "attitud", so 188 // documents with only "attitud" (but not the phrase) in them get the same 189 // wdf, and have the same total weight. There turns out to be exactly one 190 // such document. 191 subqueries_sameweight_count.push_back(1); 192 subqueries_diffweight_count.push_back(3); 193 194 for (vector<vector<Xapian::Query> >::size_type subqgroup = 0; 195 subqgroup != subqueries_list.size(); ++subqgroup) 196 { 197 vector<Xapian::Query> * qlist = &(subqueries_list[subqgroup]); 198 // Run two queries, one joining the subqueries with OR and one joining 199 // them with SYNONYM. 200 Xapian::Enquire enquire(db); 201 202 // Do the search with OR 203 Xapian::Query orquery(Xapian::Query::OP_OR, qlist->begin(), qlist->end()); 204 enquire.set_query(orquery); 205 Xapian::MSet ormset = enquire.get_mset(0, lots); 206 207 // Do the search with synonym, getting all the results. 208 Xapian::Query synquery(Xapian::Query::OP_SYNONYM, qlist->begin(), qlist->end()); 209 enquire.set_query(synquery); 210 Xapian::MSet synmset = enquire.get_mset(0, lots); 211 212 tout << "Comparing " << orquery << " with " << synquery << '\n'; 213 214 // Check that the queries return some results. 215 TEST_NOT_EQUAL(synmset.size(), 0); 216 // Check that the queries return the same number of results. 217 TEST_EQUAL(synmset.size(), ormset.size()); 218 map<Xapian::docid, Xapian::weight> values_or; 219 map<Xapian::docid, Xapian::weight> values_synonym; 220 for (Xapian::doccount i = 0; i < synmset.size(); ++i) { 221 values_or[*ormset[i]] = ormset[i].get_weight(); 222 values_synonym[*synmset[i]] = synmset[i].get_weight(); 223 } 224 TEST_EQUAL(values_or.size(), values_synonym.size()); 225 226 /* Check that the most of the weights for items in the "or" mset are 227 * different from those in the "synonym" mset. */ 228 int same_weight = 0; 229 int different_weight = 0; 230 for (map<Xapian::docid, Xapian::weight>::const_iterator 231 j = values_or.begin(); j != values_or.end(); ++j) { 232 Xapian::docid did = j->first; 233 // Check that all the results in the or tree make it to the synonym 234 // tree. 235 TEST(values_synonym.find(did) != values_synonym.end()); 236 if (values_or[did] == values_synonym[did]) { 237 ++same_weight; 238 } else { 239 ++different_weight; 240 } 241 } 242 243 int expected_same = subqueries_sameweight_count[subqgroup]; 244 int expected_diff = subqueries_diffweight_count[subqgroup]; 245 246 TEST_EQUAL(different_weight, expected_diff); 247 TEST_EQUAL(same_weight, expected_same); 248 249 // Do the search with synonym, but just get the top result. 250 // (Regression test - the OR subquery in the synonym postlist tree used 251 // to shortcut incorrectly, and return the wrong result here). 252 Xapian::MSet mset_top = enquire.get_mset(0, 1); 253 TEST_EQUAL(mset_top.size(), 1); 254 TEST(mset_range_is_same(mset_top, 0, synmset, 0, 1)); 255 } 256 return true; 257 } 258 259 // Regression test - test a synonym search with a MultiAndPostlist. 260 DEFINE_TESTCASE(synonym2, backend) { 261 Xapian::Query query; 262 vector<Xapian::Query> subqueries; 263 subqueries.push_back(Xapian::Query("file")); 264 subqueries.push_back(Xapian::Query("the")); 265 subqueries.push_back(Xapian::Query("next")); 266 subqueries.push_back(Xapian::Query("reader")); 267 query = Xapian::Query(Xapian::Query::OP_AND, subqueries.begin(), subqueries.end()); 268 subqueries.clear(); 269 subqueries.push_back(query); 270 subqueries.push_back(Xapian::Query("gutenberg")); 271 query = Xapian::Query(Xapian::Query::OP_SYNONYM, subqueries.begin(), subqueries.end()); 272 273 tout << query << '\n'; 274 275 Xapian::Database db(get_database("etext")); 276 Xapian::Enquire enquire(db); 277 enquire.set_query(query); 278 Xapian::MSet mset = enquire.get_mset(0, 10); 279 tout << mset << '\n'; 280 281 // Regression test that OP_SCALE_WEIGHT works with OP_SYNONYM 282 double maxposs = mset.get_max_possible(); 283 query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 10.0); 284 enquire.set_query(query); 285 mset = enquire.get_mset(0, 10); 286 double maxposs2 = mset.get_max_possible(); 287 288 TEST_EQUAL_DOUBLE(maxposs * 10.0, maxposs2); 289 290 return true; 291 } 292 293 static void 294 check_msets_contain_same_docs(const Xapian::MSet & mset1, 295 const Xapian::MSet & mset2) 296 { 297 TEST_EQUAL(mset1.size(), mset2.size()); 298 299 set<Xapian::docid> docids; 300 for (Xapian::doccount i = 0; i < mset1.size(); ++i) { 301 docids.insert(*mset1[i]); 302 } 303 304 // Check that all the results in mset1 are in mset2. 305 for (Xapian::doccount j = 0; j < mset2.size(); ++j) { 306 // Check that we can erase each entry from mset2 element. Since mset1 307 // and mset2 are the same size this means we can be sure that there 308 // were no repeated docids in either (it would be a bug if there were). 309 TEST(docids.erase(*mset2[j])); 310 } 311 } 312 313 // Test a synonym search which has had its weight scaled to 0. 314 DEFINE_TESTCASE(synonym3, backend) { 315 Xapian::Query query = Xapian::Query(Xapian::Query::OP_SYNONYM, 316 Xapian::Query("sky"), 317 Xapian::Query("date")); 318 319 Xapian::Database db(get_database("etext")); 320 Xapian::Enquire enquire(db); 321 enquire.set_query(query); 322 Xapian::MSet mset_orig = enquire.get_mset(0, db.get_doccount()); 323 324 tout << query << '\n'; 325 tout << mset_orig << '\n'; 326 327 // Test that OP_SCALE_WEIGHT with a factor of 0.0 works with OP_SYNONYM 328 // (this has a special codepath to avoid doing the synonym calculation). 329 query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 0.0); 330 enquire.set_query(query); 331 Xapian::MSet mset_zero = enquire.get_mset(0, db.get_doccount()); 332 333 tout << query << '\n'; 334 tout << mset_zero << '\n'; 335 336 // Check that the queries return some results. 337 TEST_NOT_EQUAL(mset_zero.size(), 0); 338 // Check that the queries return the same document IDs, and the zero 339 // one has zero weight. 340 check_msets_contain_same_docs(mset_orig, mset_zero); 341 for (Xapian::doccount i = 0; i < mset_orig.size(); ++i) { 342 TEST_NOT_EQUAL(mset_orig[i].get_weight(), 0.0); 343 TEST_EQUAL(mset_zero[i].get_weight(), 0.0); 344 } 345 346 return true; 347 } 348 349 // Test synonym searches combined with various operators. 350 DEFINE_TESTCASE(synonym4, backend) { 351 Xapian::Database db(get_database("etext")); 352 Xapian::Enquire enquire(db); 353 Xapian::Query syn_query = Xapian::Query(Xapian::Query::OP_SYNONYM, 354 Xapian::Query("gutenberg"), 355 Xapian::Query("blockhead")); 356 Xapian::Query or_query = Xapian::Query(Xapian::Query::OP_OR, 357 Xapian::Query("gutenberg"), 358 Xapian::Query("blockhead")); 359 Xapian::Query date_query = Xapian::Query("date"); 360 361 // Check some queries. 362 static const Xapian::Query::op operators[] = { 363 Xapian::Query::OP_AND_MAYBE, 364 Xapian::Query::OP_AND_NOT, 365 Xapian::Query::OP_AND, 366 Xapian::Query::OP_XOR, 367 Xapian::Query::OP_OR, 368 Xapian::Query::OP_SYNONYM 369 }; 370 const Xapian::Query::op * end; 371 end = operators + sizeof(operators) / sizeof(operators[0]); 372 for (const Xapian::Query::op * i = operators; i != end; ++i) { 373 tout.str(string()); 374 Xapian::Query query1(*i, syn_query, date_query); 375 Xapian::Query query2(*i, or_query, date_query); 376 377 enquire.set_query(query1); 378 tout << "query1:" << query1 << '\n'; 379 Xapian::MSet mset1 = enquire.get_mset(0, db.get_doccount()); 380 tout << "mset1:" << mset1 << '\n'; 381 enquire.set_query(query2); 382 tout << "query2:" << query2 << '\n'; 383 Xapian::MSet mset2 = enquire.get_mset(0, db.get_doccount()); 384 tout << "mset2:" << mset2 << '\n'; 385 386 TEST_NOT_EQUAL(mset1.size(), 0); 387 check_msets_contain_same_docs(mset1, mset2); 388 } 389 390 return true; 391 } -
xapian-core/tests/queryparsertest.cc
Property changes on: xapian-core/tests/api_opsynonym.cc ___________________________________________________________________ Added: svn:eol-style + native
1 1 /* queryparsertest.cc: Tests of Xapian::QueryParser 2 2 * 3 3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts 4 * Copyright (C) 2007,2009 Lemur Consulting Ltd 4 5 * 5 6 * This program is free software; you can redistribute it and/or 6 7 * modify it under the terms of the GNU General Public License as … … 789 790 Xapian::Query qobj = qp.parse_query("ab*", Xapian::QueryParser::FLAG_WILDCARD); 790 791 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(abc:(pos=1))"); 791 792 qobj = qp.parse_query("muscle*", Xapian::QueryParser::FLAG_WILDCARD); 792 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) ORmusclebound:(pos=1)))");793 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) SYNONYM musclebound:(pos=1)))"); 793 794 qobj = qp.parse_query("meat*", Xapian::QueryParser::FLAG_WILDCARD); 794 795 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query()"); 795 796 qobj = qp.parse_query("musc*", Xapian::QueryParser::FLAG_WILDCARD); 796 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) ORmuscular:(pos=1)))");797 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)))"); 797 798 qobj = qp.parse_query("mutt*", Xapian::QueryParser::FLAG_WILDCARD); 798 799 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(mutton:(pos=1))"); 799 800 // Regression test (we weren't lowercasing terms before checking if they … … 886 887 qp.add_prefix("author", "A"); 887 888 Xapian::Query qobj; 888 889 qobj = qp.parse_query("author:h*", Xapian::QueryParser::FLAG_WILDCARD); 889 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) ORAhuxley:(pos=1)))");890 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)))"); 890 891 qobj = qp.parse_query("author:h* test", Xapian::QueryParser::FLAG_WILDCARD); 891 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( Aheinlein:(pos=1) OR Ahuxley:(pos=1) OR test:(pos=2)))");892 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)) OR test:(pos=2)))"); 892 893 return true; 893 894 #endif 894 895 } … … 918 919 doc.add_term("XTcowl"); 919 920 doc.add_term("XTcox"); 920 921 doc.add_term("ZXTcow"); 922 doc.add_term("XONEpartial"); 923 doc.add_term("XONEpartial2"); 924 doc.add_term("XTWOpartial3"); 925 doc.add_term("XTWOpartial4"); 921 926 db.add_document(doc); 922 927 Xapian::QueryParser qp; 923 928 qp.set_database(db); … … 933 938 qobj = qp.parse_query("ab", Xapian::QueryParser::FLAG_PARTIAL); 934 939 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR Zab:(pos=1)))"); 935 940 qobj = qp.parse_query("muscle", Xapian::QueryParser::FLAG_PARTIAL); 936 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( muscle:(pos=1) OR musclebound:(pos=1) OR Zmuscl:(pos=1)))");941 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscle:(pos=1) SYNONYM musclebound:(pos=1)) OR Zmuscl:(pos=1)))"); 937 942 qobj = qp.parse_query("meat", Xapian::QueryParser::FLAG_PARTIAL); 938 943 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(Zmeat:(pos=1))"); 939 944 qobj = qp.parse_query("musc", Xapian::QueryParser::FLAG_PARTIAL); 940 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1) OR Zmusc:(pos=1)))");945 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)) OR Zmusc:(pos=1)))"); 941 946 qobj = qp.parse_query("mutt", Xapian::QueryParser::FLAG_PARTIAL); 942 947 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((mutton:(pos=1) OR Zmutt:(pos=1)))"); 943 948 qobj = qp.parse_query("abc musc", Xapian::QueryParser::FLAG_PARTIAL); 944 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR muscat:(pos=2) OR muscle:(pos=2) OR musclebound:(pos=2) OR muscular:(pos=2) OR Zmusc:(pos=2)))");949 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR (muscat:(pos=2) SYNONYM muscle:(pos=2) SYNONYM musclebound:(pos=2) SYNONYM muscular:(pos=2)) OR Zmusc:(pos=2)))"); 945 950 qobj = qp.parse_query("a* mutt", Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_WILDCARD); 946 951 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR mutton:(pos=2) OR Zmutt:(pos=2)))"); 947 952 948 953 // Check behaviour with stemmed terms, and stem strategy STEM_SOME. 949 954 qobj = qp.parse_query("o", Xapian::QueryParser::FLAG_PARTIAL); 950 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zo:(pos=1)))");955 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zo:(pos=1)))"); 951 956 qobj = qp.parse_query("ou", Xapian::QueryParser::FLAG_PARTIAL); 952 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zou:(pos=1)))");957 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zou:(pos=1)))"); 953 958 qobj = qp.parse_query("out", Xapian::QueryParser::FLAG_PARTIAL); 954 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zout:(pos=1)))");959 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zout:(pos=1)))"); 955 960 qobj = qp.parse_query("outs", Xapian::QueryParser::FLAG_PARTIAL); 956 961 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR Zout:(pos=1)))"); 957 962 qobj = qp.parse_query("outsi", Xapian::QueryParser::FLAG_PARTIAL); … … 963 968 964 969 // Check behaviour with capitalised terms, and stem strategy STEM_SOME. 965 970 qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL); 966 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1,wqf=2) OR outside:(pos=1)))");971 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))"); 967 972 qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL); 968 973 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR outs:(pos=1)))"); 969 974 qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL); … … 972 977 // And now with stemming strategy STEM_ALL. 973 978 qp.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); 974 979 qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL); 975 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1,wqf=2) OR outside:(pos=1)))");980 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))"); 976 981 qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL); 977 982 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR out:(pos=1)))"); 978 983 qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL); … … 981 986 // Check handling of a case with a prefix. 982 987 qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); 983 988 qobj = qp.parse_query("title:cow", Xapian::QueryParser::FLAG_PARTIAL); 984 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( XTcowl:(pos=1) OR XTcows:(pos=1) OR ZXTcow:(pos=1)))");989 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR ZXTcow:(pos=1)))"); 985 990 qobj = qp.parse_query("title:cows", Xapian::QueryParser::FLAG_PARTIAL); 986 991 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcows:(pos=1) OR ZXTcow:(pos=1)))"); 987 992 qobj = qp.parse_query("title:Cow", Xapian::QueryParser::FLAG_PARTIAL); 988 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( XTcowl:(pos=1) OR XTcows:(pos=1) OR XTcow:(pos=1)))");993 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR XTcow:(pos=1)))"); 989 994 qobj = qp.parse_query("title:Cows", Xapian::QueryParser::FLAG_PARTIAL); 990 995 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(XTcows:(pos=1,wqf=2))"); 991 996 … … 993 998 // inflate the wqf of the "parsed as normal" version of a partial term 994 999 // by multiplying it by the number of prefixes mapped to. 995 1000 qobj = qp.parse_query("double:vision", Xapian::QueryParser::FLAG_PARTIAL); 996 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) OR ZXTWOvision:(pos=1)))"); 1001 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) SYNONYM ZXTWOvision:(pos=1)))"); 1002 1003 // Test handling of FLAG_PARTIAL when there's more than one prefix. 1004 qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL); 1005 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (ZXONEpart:(pos=1) SYNONYM ZXTWOpart:(pos=1))))"); 1006 1007 // Test handling of FLAG_PARTIAL when there's more than one prefix, without 1008 // stemming. 1009 qp.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); 1010 qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL); 1011 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpart:(pos=1) SYNONYM XTWOpart:(pos=1))))"); 1012 qobj = qp.parse_query("double:partial", Xapian::QueryParser::FLAG_PARTIAL); 1013 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpartial:(pos=1) SYNONYM XTWOpartial:(pos=1))))"); 997 1014 998 1015 return true; 999 1016 #endif … … 1563 1580 } 1564 1581 1565 1582 static test test_synonym_queries[] = { 1566 { "searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) ORZlocate:(pos=1))" },1567 { "search", "(Zsearch:(pos=1) ORfind:(pos=1))" },1568 { "Search", "(search:(pos=1) ORfind:(pos=1))" },1583 { "searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" }, 1584 { "search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" }, 1585 { "Search", "(search:(pos=1) SYNONYM find:(pos=1))" }, 1569 1586 { "Searching", "searching:(pos=1)" }, 1570 { "searching OR terms", "( Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },1571 { "search OR terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1572 { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) ORfind:(pos=1)))" },1573 { "search -terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_NOT Zterm:(pos=2))" },1574 { "+search terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_MAYBE Zterm:(pos=2))" },1575 { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) ORfind:(pos=1)))" },1576 { "search terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1587 { "searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" }, 1588 { "search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1589 { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1590 { "search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" }, 1591 { "+search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" }, 1592 { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1593 { "search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1577 1594 // Shouldn't trigger synonyms: 1578 1595 { "\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" }, 1579 1596 { NULL, NULL } … … 1613 1630 1614 1631 static test test_multi_synonym_queries[] = { 1615 1632 { "sun OR tan OR cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3))" }, 1616 { "sun tan", "( Zsun:(pos=1) OR Ztan:(pos=2) ORbathe:(pos=1))" },1617 { "sun tan cream", "( Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3) ORlotion:(pos=1))" },1618 { "beach sun tan holiday", "(Zbeach:(pos=1) OR Zsun:(pos=2) OR Ztan:(pos=3) OR bathe:(pos=2) OR Zholiday:(pos=4))" },1619 { "sun tan sun tan cream", "( Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1) OR Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5) OR lotion:(pos=3))" },1620 { "single", "(Zsingl:(pos=1) ORrecord:(pos=1))" },1633 { "sun tan", "((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1))" }, 1634 { "sun tan cream", "((Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3)) SYNONYM lotion:(pos=1))" }, 1635 { "beach sun tan holiday", "(Zbeach:(pos=1) OR ((Zsun:(pos=2) OR Ztan:(pos=3)) SYNONYM bathe:(pos=2)) OR Zholiday:(pos=4))" }, 1636 { "sun tan sun tan cream", "(((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1)) OR ((Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5)) SYNONYM lotion:(pos=3)))" }, 1637 { "single", "(Zsingl:(pos=1) SYNONYM record:(pos=1))" }, 1621 1638 { NULL, NULL } 1622 1639 }; 1623 1640 … … 1656 1673 1657 1674 static test test_synonym_op_queries[] = { 1658 1675 { "searching", "Zsearch:(pos=1)" }, 1659 { "~searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) ORZlocate:(pos=1))" },1660 { "~search", "(Zsearch:(pos=1) ORfind:(pos=1))" },1661 { "~Search", "(search:(pos=1) ORfind:(pos=1))" },1676 { "~searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" }, 1677 { "~search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" }, 1678 { "~Search", "(search:(pos=1) SYNONYM find:(pos=1))" }, 1662 1679 { "~Searching", "searching:(pos=1)" }, 1663 { "~searching OR terms", "( Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },1664 { "~search OR terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1665 { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) ORfind:(pos=1)))" },1666 { "~search -terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_NOT Zterm:(pos=2))" },1667 { "+~search terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_MAYBE Zterm:(pos=2))" },1668 { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) ORfind:(pos=1)))" },1669 { "~search terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1680 { "~searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" }, 1681 { "~search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1682 { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1683 { "~search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" }, 1684 { "+~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" }, 1685 { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1686 { "~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1670 1687 // FIXME: should look for multi-term synonym... 1671 1688 { "~\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" }, 1672 1689 { NULL, NULL } -
xapian-core/tests/Makefile.am
116 116 api_db.cc \ 117 117 api_generated.cc \ 118 118 api_nodb.cc \ 119 api_opsynonym.cc \ 119 120 api_percentages.cc \ 120 121 api_posdb.cc \ 121 122 api_query.cc \ -
xapian-core/include/xapian/query.h
Property changes on: xapian-core/tests ___________________________________________________________________ Modified: svn:ignore - .*.sw? *.lo *.pyc Makefile.in Makefile .deps .libs apitest internaltest perftest queryparsertest runsrv runtest stemtest termgentest apitest.exe internaltest.exe perftest.exe queryparsertest.exe stemtest.exe termgentest.exe .chert .flint .multi .multichert .multiflint .stub api_all.h api_anydb.h api_backend.h api_closedb.h api_collapse.h api_collated.h api_collated.stamp api_db.h api_generated.cc api_generated.h api_nodb.h api_percentages.h api_posdb.h api_replicate.h api_query.h api_scalability.h api_serialise.h api_sorting.h api_spelling.h api_transdb.h api_unicode.h api_valuestats.h api_valuestream.h api_wrdb.h perftest_all.h perftest_collated.h perftest_collated.stamp perftest_matchdecider.h perftest_randomidx.h perflog.xml submitperftest + .*.sw? *.lo *.pyc Makefile.in Makefile .deps .libs apitest internaltest perftest queryparsertest runsrv runtest stemtest termgentest apitest.exe internaltest.exe perftest.exe queryparsertest.exe stemtest.exe termgentest.exe .chert .flint .multi .multichert .multiflint .stub api_all.h api_anydb.h api_backend.h api_closedb.h api_collapse.h api_collated.h api_collated.stamp api_db.h api_generated.cc api_generated.h api_nodb.h api_opsynonym.h api_percentages.h api_posdb.h api_replicate.h api_query.h api_scalability.h api_serialise.h api_sorting.h api_spelling.h api_transdb.h api_unicode.h api_valuestats.h api_valuestream.h api_wrdb.h perftest_all.h perftest_collated.h perftest_collated.stamp perftest_matchdecider.h perftest_randomidx.h perflog.xml submitperftest
119 119 OP_VALUE_GE, 120 120 121 121 /** Filter by a less-than-or-equal test on a document value. */ 122 OP_VALUE_LE 122 OP_VALUE_LE, 123 124 /** Treat a set of queries as synonyms. 125 * 126 * This returns all results which match at least one of the 127 * queries, but weighting as if all the sub-queries are instances 128 * of the same term: so multiple matching terms for a document 129 * increase the wdf value used, and the term frequency is based on 130 * the number of documents which would match an OR of all the 131 * subqueries. 132 * 133 * The term frequency used will usually be an approximation, 134 * because calculating the precise combined term frequency would 135 * be overly expensive. 136 * 137 * Identical to OP_OR, except for the weightings returned. 138 */ 139 OP_SYNONYM 123 140 } op; 124 141 125 142 /** Copy constructor. */ -
xapian-core/include/xapian/weight.h
2 2 * @brief Weighting scheme API. 3 3 */ 4 4 /* Copyright (C) 2007,2008,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 212 213 const std::string & term, Xapian::termcount wqf_, 213 214 double factor); 214 215 216 /** @private @internal Initialise this object to calculate weights for a 217 * synonym. 218 * 219 * @param stats Source of statistics. 220 * @param query_len_ Query length. 221 * @param factor Any scaling factor (e.g. from OP_SCALE_WEIGHT). 222 * @param termfreq The termfreq to use. 223 * @param reltermfreq The reltermfreq to use. 224 */ 225 void init_(const Internal & stats, Xapian::termcount query_len_, 226 double factor, Xapian::doccount termfreq, 227 Xapian::doccount reltermfreq); 228 215 229 /** @private @internal Initialise this object to calculate the extra weight 216 230 * component. 217 231 * … … 230 244 return stats_needed & DOC_LENGTH; 231 245 } 232 246 247 /** @private @internal Return true if the WDF is needed. 248 * 249 * If this method returns true, then the WDF will be fetched and passed to 250 * @a get_sumpart(). Otherwise 0 may be passed for the wdf. 251 */ 252 bool get_sumpart_needs_wdf_() const { 253 return stats_needed & WDF; 254 } 255 233 256 protected: 234 257 /// Only allow subclasses to copy us. 235 258 Weight(const Weight &); … … 373 396 need_stat(RELTERMFREQ); 374 397 need_stat(WDF); 375 398 need_stat(WDF_MAX); 399 need_stat(WDF); 376 400 if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) { 377 401 need_stat(DOC_LENGTH_MIN); 378 402 need_stat(AVERAGE_LENGTH); … … 392 416 need_stat(RELTERMFREQ); 393 417 need_stat(WDF); 394 418 need_stat(WDF_MAX); 419 need_stat(WDF); 395 420 need_stat(DOC_LENGTH_MIN); 396 421 need_stat(AVERAGE_LENGTH); 397 422 need_stat(DOC_LENGTH); … … 455 480 need_stat(DOC_LENGTH_MIN); 456 481 need_stat(WDF); 457 482 need_stat(WDF_MAX); 483 need_stat(WDF); 458 484 } 459 485 460 486 std::string name() const; -
xapian-core/net/serialise.cc
128 128 result += encode_length(stats.collection_size); 129 129 result += encode_length(stats.rset_size); 130 130 131 map<string, Xapian::doccount>::const_iterator i;131 map<string, TermFreqs>::const_iterator i; 132 132 133 result += encode_length(stats.termfreq.size()); 134 for (i = stats.termfreq.begin(); i != stats.termfreq.end(); ++i) { 133 // FIXME - next time we're breaking the protocol API, do the freqs and 134 // relfreqs term-by-term in a single pass. 135 136 result += encode_length(stats.termfreqs.size()); 137 for (i = stats.termfreqs.begin(); i != stats.termfreqs.end(); ++i) { 135 138 result += encode_length(i->first.size()); 136 139 result += i->first; 137 result += encode_length(i->second );140 result += encode_length(i->second.termfreq); 138 141 } 139 142 140 for (i = stats. reltermfreq.begin(); i != stats.reltermfreq.end(); ++i) {143 for (i = stats.termfreqs.begin(); i != stats.termfreqs.end(); ++i) { 141 144 result += encode_length(i->first.size()); 142 145 result += i->first; 143 result += encode_length(i->second );146 result += encode_length(i->second.reltermfreq); 144 147 } 145 148 146 149 return result; … … 163 166 size_t len = decode_length(&p, p_end, true); 164 167 string term(p, len); 165 168 p += len; 166 stat.termfreq .insert(make_pair(term, decode_length(&p, p_end, false)));169 stat.termfreqs.insert(make_pair(term, TermFreqs(decode_length(&p, p_end, false), 0))); 167 170 } 168 171 169 172 while (p != p_end) { 170 173 size_t len = decode_length(&p, p_end, true); 171 174 string term(p, len); 172 175 p += len; 173 stat. reltermfreq.insert(make_pair(term, decode_length(&p, p_end, false)));176 stat.termfreqs[term].reltermfreq = decode_length(&p, p_end, false); 174 177 } 175 178 176 179 return stat; -
xapian-core/common/leafpostlist.h
2 2 * @brief Abstract base class for leaf postlists. 3 3 */ 4 4 /* Copyright (C) 2007 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 23 24 24 25 #include "postlist.h" 25 26 27 #include <string> 28 26 29 namespace Xapian { 27 30 class Weight; 28 31 } … … 76 79 Xapian::weight recalc_maxweight(); 77 80 }; 78 81 82 /// Abstract base class for leaf postlists based on a term. 83 class TermBasedLeafPostList : public LeafPostList { 84 /// Don't allow assignment. 85 void operator=(const TermBasedLeafPostList &); 86 87 /// Don't allow copying. 88 TermBasedLeafPostList(const TermBasedLeafPostList &); 89 90 protected: 91 /// The term name for this postlist ("" for an alldocs postlist). 92 std::string tname; 93 94 /// Only constructable as a base class for derived classes. 95 TermBasedLeafPostList(const std::string & tname_) 96 : LeafPostList(), tname(tname_) {} 97 98 public: 99 TermFreqs get_termfreq_est_using_stats( 100 const Xapian::Weight::Internal & stats) const; 101 }; 102 79 103 #endif // XAPIAN_INCLUDED_LEAFPOSTLIST_H -
xapian-core/common/postlist.h
2 2 * @brief Abstract base class for postlists. 3 3 */ 4 4 /* Copyright (C) 2007,2008,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 28 29 #include <xapian/postingiterator.h> 29 30 30 31 #include "positionlist.h" 32 #include "weightinternal.h" 31 33 32 34 /// Abstract base class for postlists. 33 35 class Xapian::PostingIterator::Internal : public Xapian::Internal::RefCntBase { … … 60 62 */ 61 63 virtual Xapian::doccount get_termfreq_est() const = 0; 62 64 65 /** Get an estimate for the termfreq and reltermfreq, given the stats. 66 * 67 * The frequencies may be for a combination of databases, or for just the 68 * relevant documents, so the results need not lie in the bounds given by 69 * get_termfreq_min() and get_termfreq_max(). 70 */ 71 virtual TermFreqs get_termfreq_est_using_stats( 72 const Xapian::Weight::Internal & stats) const = 0; 73 63 74 /// Return an upper bound on what get_weight() can return. 64 75 virtual Xapian::weight get_maxweight() const = 0; 65 76 -
xapian-core/common/contiguousalldocspostlist.h
2 2 * @brief Iterate all document ids when they form a contiguous range. 3 3 */ 4 4 /* Copyright (C) 2007,2008,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 27 28 #include "leafpostlist.h" 28 29 29 30 /// A PostList iteratating all docids when they form a contiguous range. 30 class ContiguousAllDocsPostList : public LeafPostList {31 class ContiguousAllDocsPostList : public TermBasedLeafPostList { 31 32 /// Don't allow assignment. 32 33 void operator=(const ContiguousAllDocsPostList &); 33 34 … … 47 48 /// Constructor. 48 49 ContiguousAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> db_, 49 50 Xapian::doccount doccount_) 50 : db(db_), did(0), doccount(doccount_) { } 51 : TermBasedLeafPostList(std::string()), 52 db(db_), did(0), doccount(doccount_) { } 51 53 52 54 /** Return the term frequency. 53 55 * -
xapian-core/common/rset.h
46 46 const Xapian::Database root; 47 47 const Xapian::Database::Internal *dbroot; 48 48 49 Xapian::TermFreqMapreltermfreqs;49 std::map<std::string, Xapian::doccount> reltermfreqs; 50 50 bool calculated_reltermfreqs; 51 51 52 52 /** Calculate the statistics. -
xapian-core/common/weightinternal.h
31 31 #include <map> 32 32 #include <string> 33 33 34 namespace Xapian { 34 /// A pair holding a termfreq and reltermfreq. 35 struct TermFreqs { 36 Xapian::doccount termfreq; 37 Xapian::doccount reltermfreq; 38 39 TermFreqs() : termfreq(0), reltermfreq(0) {} 40 TermFreqs(Xapian::doccount termfreq_, Xapian::doccount reltermfreq_) 41 : termfreq(termfreq_), reltermfreq(reltermfreq_) {} 42 43 void operator +=(const TermFreqs & other) { 44 termfreq += other.termfreq; 45 reltermfreq += other.reltermfreq; 46 } 47 48 /// Return a std::string describing this object. 49 std::string get_description() const; 50 }; 35 51 36 /** A mapping from term to term frequency. */ 37 typedef std::map<std::string, Xapian::doccount> TermFreqMap; 52 namespace Xapian { 38 53 39 54 /** Class to hold statistics for a given collection. */ 40 55 class Weight::Internal { … … 51 66 /** Database to get the bounds on doclength and wdf from. */ 52 67 Xapian::Database db; 53 68 54 /** Map of term frequencies for the collection. */ 55 TermFreqMap termfreq; 56 57 /** Map of relevant term frequencies for the collection. */ 58 TermFreqMap reltermfreq; 69 /** Map of term frequencies and relevant term frequencies for the 70 * collection. */ 71 std::map<std::string, TermFreqs> termfreqs; 59 72 60 73 /** Create a Weight::Internal object with global statistics. 61 74 * -
xapian-core/common/output.h
81 81 XAPIAN_OUTPUT_FUNCTION(Xapian::DatabaseReplica) 82 82 83 83 #include "weightinternal.h" 84 XAPIAN_OUTPUT_FUNCTION(TermFreqs) 84 85 XAPIAN_OUTPUT_FUNCTION(Xapian::Weight::Internal) 85 86 86 87 #endif /* XAPIAN_INCLUDED_OUTPUT_H */ -
xapian-core/common/remoteprotocol.h
40 40 // 30.5: New MSG_GETMSET which expects MSet's percent_factor to be returned. 41 41 // 30.6: Support for OP_VALUE_GE and OP_VALUE_LE in query serialisation 42 42 // 31: Clean up for Xapian 1.1.0 43 44 // NOTE: when next breaking compatibility, address the FIXME in 45 // net/serialise.cc in serialise_stats() regarding serialising the termfreq and 46 // reltermfreqs together, rather than as separate lists. 47 43 48 #define XAPIAN_REMOTE_PROTOCOL_MAJOR_VERSION 31 44 49 #define XAPIAN_REMOTE_PROTOCOL_MINOR_VERSION 0 45 50 -
xapian-core/common/emptypostlist.h
2 2 * 3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002,2003,2007,2009 Olly Betts 5 * Copyright 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 28 29 class EmptyPostList : public LeafPostList { 29 30 public: 30 31 Xapian::doccount get_termfreq() const { return 0; } 32 TermFreqs get_termfreq_est_using_stats( 33 const Xapian::Weight::Internal &) const { return TermFreqs(); } 31 34 32 35 Xapian::docid get_docid() const; 33 36 Xapian::weight get_weight() const; -
xapian-core/api/leafpostlist.cc
Property changes on: xapian-core/m4-macros/xapian-1.1.m4 ___________________________________________________________________ Deleted: svn:mergeinfo
2 2 * @brief Abstract base class for leaf postlists. 3 3 */ 4 4 /* Copyright (C) 2007,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 24 25 25 26 #include "leafpostlist.h" 26 27 #include "omassert.h" 28 #include "debuglog.h" 27 29 28 30 LeafPostList::~LeafPostList() 29 31 { … … 79 81 { 80 82 return LeafPostList::get_maxweight(); 81 83 } 84 85 TermFreqs 86 TermBasedLeafPostList::get_termfreq_est_using_stats( 87 const Xapian::Weight::Internal & stats) const 88 { 89 LOGCALL(MATCH, TermFreqs, 90 "TermBasedLeafPostList::get_termfreq_est_using_stats", stats); 91 if (tname.empty()) { 92 RETURN(TermFreqs(stats.collection_size, stats.rset_size)); 93 } 94 std::map<std::string, TermFreqs>::const_iterator i = 95 stats.termfreqs.find(tname); 96 RETURN(i->second); 97 } -
xapian-core/api/omqueryinternal.cc
65 65 case Xapian::Query::OP_VALUE_RANGE: 66 66 case Xapian::Query::OP_VALUE_GE: 67 67 case Xapian::Query::OP_VALUE_LE: 68 case Xapian::Query::OP_SYNONYM: 68 69 return 0; 69 70 case Xapian::Query::OP_SCALE_WEIGHT: 70 71 return 1; … … 100 101 case Xapian::Query::OP_NEAR: 101 102 case Xapian::Query::OP_PHRASE: 102 103 case Xapian::Query::OP_ELITE_SET: 104 case Xapian::Query::OP_SYNONYM: 103 105 return UINT_MAX; 104 106 default: 105 107 Assert(false); … … 221 223 result += "."; 222 224 result += str_parameter; // serialise_double(get_dbl_parameter()); 223 225 break; 226 case Xapian::Query::OP_SYNONYM: 227 result += "="; 228 break; 224 229 } 225 230 } 226 231 return result; … … 251 256 case Xapian::Query::OP_VALUE_GE: name = "VALUE_GE"; break; 252 257 case Xapian::Query::OP_VALUE_LE: name = "VALUE_LE"; break; 253 258 case Xapian::Query::OP_SCALE_WEIGHT: name = "SCALE_WEIGHT"; break; 259 case Xapian::Query::OP_SYNONYM: name = "SYNONYM"; break; 254 260 } 255 261 return name; 256 262 } … … 584 590 return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT, 585 591 subqs, 0, param); 586 592 } 587 default: 593 case '=': { 594 return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs); 595 } 596 default: 588 597 LOGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'"); 589 598 throw Xapian::InvalidArgumentError("Invalid query string"); 590 599 } … … 809 818 case OP_ELITE_SET: 810 819 case OP_OR: 811 820 case OP_XOR: 821 case OP_SYNONYM: 812 822 // Doing an "OR" type operation - if we've got any MatchNothing 813 823 // subnodes, drop them; except that we mustn't become an empty 814 824 // node due to this, so we never drop a MatchNothing subnode … … 900 910 } 901 911 } 902 912 break; 903 case OP_OR: case OP_AND: case OP_XOR: 913 case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM: 904 914 // Remove duplicates if we can. 905 915 if (subqs.size() > 1) collapse_subqs(); 906 916 break; … … 944 954 void 945 955 Xapian::Query::Internal::collapse_subqs() 946 956 { 947 Assert(op == OP_OR || op == OP_AND || op == OP_XOR );957 Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM); 948 958 typedef set<Xapian::Query::Internal *, SortPosName> subqtable; 949 959 subqtable sqtab; 950 960 … … 1038 1048 Assert(!is_leaf(op)); 1039 1049 if (subq == 0) { 1040 1050 subqs.push_back(0); 1041 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR )) {1051 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) { 1042 1052 // Distribute the subquery. 1043 1053 for (subquery_list::const_iterator i = subq->subqs.begin(); 1044 1054 i != subq->subqs.end(); i++) { … … 1055 1065 Assert(!is_leaf(op)); 1056 1066 if (subq == 0) { 1057 1067 subqs.push_back(0); 1058 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR )) {1068 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) { 1059 1069 // Distribute the subquery. 1060 1070 for (subquery_list::const_iterator i = subq->subqs.begin(); 1061 1071 i != subq->subqs.end(); i++) { -
xapian-core/backends/multi/multi_postlist.cc
2 2 * 3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts 5 * Copyright 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 59 60 return 0; 60 61 } 61 62 63 TermFreqs 64 MultiPostList::get_termfreq_est_using_stats( 65 const Xapian::Weight::Internal &) const 66 { 67 // Should never get called. 68 Assert(false); 69 return TermFreqs(); 70 } 71 62 72 Xapian::docid 63 73 MultiPostList::get_docid() const 64 74 { -
xapian-core/backends/multi/multi_postlist.h
2 2 * 3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2003,2005,2007,2009 Olly Betts 5 * Copyright 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 43 44 ~MultiPostList(); 44 45 45 46 Xapian::doccount get_termfreq() const; 47 TermFreqs get_termfreq_est_using_stats( 48 const Xapian::Weight::Internal & stats) const; 46 49 47 50 Xapian::docid get_docid() const; // Gets current docid 48 51 Xapian::termcount get_doclength() const; // Get length of current document -
xapian-core/backends/remote/net_postlist.cc
3 3 */ 4 4 /* Copyright (C) 2007 Lemur Consulting Ltd 5 5 * Copyright (C) 2007,2008,2009 Olly Betts 6 * Copyright (C) 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 55 56 PositionList * 56 57 NetworkPostList::read_position_list() 57 58 { 58 lastposlist = db->open_position_list(lastdocid, t erm);59 lastposlist = db->open_position_list(lastdocid, tname); 59 60 return lastposlist.get(); 60 61 } 61 62 62 63 PositionList * 63 64 NetworkPostList::open_position_list() const 64 65 { 65 return db->open_position_list(lastdocid, t erm);66 return db->open_position_list(lastdocid, tname); 66 67 } 67 68 68 69 PostList * … … 104 105 string 105 106 NetworkPostList::get_description() const 106 107 { 107 return "NetworkPostList(" + t erm+ ")";108 return "NetworkPostList(" + tname + ")"; 108 109 } -
xapian-core/backends/remote/net_postlist.h
1 1 /** @file net_postlist.h 2 2 * @brief Postlists for remote databases 3 3 */ 4 /* Copyright (C) 2007 Lemur Consulting Ltd4 /* Copyright (C) 2007,2009 Lemur Consulting Ltd 5 5 * Copyright (C) 2007,2008,2009 Olly Betts 6 6 * 7 7 * This program is free software; you can redistribute it and/or … … 33 33 34 34 /** A postlist in a remote database. 35 35 */ 36 class NetworkPostList : public LeafPostList {36 class NetworkPostList : public TermBasedLeafPostList { 37 37 friend class RemoteDatabase; 38 38 39 39 Xapian::Internal::RefCntPtr<const RemoteDatabase> db; 40 string term;41 40 42 41 string postings; 43 42 bool started; … … 61 60 /// Default constructor. 62 61 NetworkPostList(Xapian::Internal::RefCntPtr<const RemoteDatabase> db_, 63 62 const string & term_) 64 : db(db_), term(term_), started(false), pos(NULL), pos_end(NULL), 63 : TermBasedLeafPostList(term_), 64 db(db_), started(false), pos(NULL), pos_end(NULL), 65 65 lastdocid(0), lastwdf(0), termfreq(0) 66 66 { 67 termfreq = db->read_post_list(t erm, *this);67 termfreq = db->read_post_list(tname, *this); 68 68 } 69 69 70 70 /// Get number of documents indexed by this term. -
xapian-core/backends/inmemory/inmemory_database.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts 6 * Copyright 2006 Richard Boulton6 * Copyright 2006,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 78 78 ////////////// 79 79 80 80 InMemoryPostList::InMemoryPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_, 81 const InMemoryTerm & term) 82 : pos(term.docs.begin()), 81 const InMemoryTerm & term, 82 const std::string & tname_) 83 : TermBasedLeafPostList(tname_), 84 pos(term.docs.begin()), 83 85 end(term.docs.end()), 84 86 termfreq(term.term_freq), 85 87 started(false), … … 280 282 ///////////////////////////// 281 283 282 284 InMemoryAllDocsPostList::InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_) 283 : did(0), db(db_)285 : TermBasedLeafPostList(std::string()), did(0), db(db_) 284 286 { 285 287 } 286 288 … … 415 417 return new EmptyPostList; 416 418 417 419 Xapian::Internal::RefCntPtr<const InMemoryDatabase> ptrtothis(this); 418 LeafPostList * pl = new InMemoryPostList(ptrtothis, i->second );420 LeafPostList * pl = new InMemoryPostList(ptrtothis, i->second, tname); 419 421 Assert(!pl->at_end()); 420 422 return pl; 421 423 } -
xapian-core/backends/inmemory/inmemory_database.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts 6 * Copyright 2006 Richard Boulton6 * Copyright 2006,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 132 132 133 133 /** A PostList in an inmemory database. 134 134 */ 135 class InMemoryPostList : public LeafPostList {135 class InMemoryPostList : public TermBasedLeafPostList { 136 136 friend class InMemoryDatabase; 137 137 private: 138 138 vector<InMemoryPosting>::const_iterator pos; … … 148 148 Xapian::Internal::RefCntPtr<const InMemoryDatabase> db; 149 149 150 150 InMemoryPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db, 151 const InMemoryTerm & term );151 const InMemoryTerm & term, const std::string & tname); 152 152 public: 153 153 Xapian::doccount get_termfreq() const; 154 154 … … 170 170 171 171 /** A PostList over all docs in an inmemory database. 172 172 */ 173 class InMemoryAllDocsPostList : public LeafPostList {173 class InMemoryAllDocsPostList : public TermBasedLeafPostList { 174 174 friend class InMemoryDatabase; 175 175 private: 176 176 Xapian::docid did; -
xapian-core/backends/chert/chert_postlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd6 * Copyright 2007,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 121 121 122 122 /** A postlist in a chert database. 123 123 */ 124 class ChertPostList : public LeafPostList {124 class ChertPostList : public TermBasedLeafPostList { 125 125 protected: // ChertModifiedPostList needs to access these. 126 126 /** The database we are searching. This pointer is held so that the 127 127 * database doesn't get deleted before us, and also to give us access … … 129 129 */ 130 130 Xapian::Internal::RefCntPtr<const ChertDatabase> this_db; 131 131 132 /// The termname for this postlist.133 string tname;134 135 132 /// Whether we've started reading the list yet. 136 133 bool have_started; 137 134 -
xapian-core/backends/chert/chert_postlist.cc
2 2 * 3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts 5 * Copyright 2007,2008 Lemur Consulting Ltd5 * Copyright 2007,2008,2009 Lemur Consulting Ltd 6 6 * 7 7 * This program is free software; you can redistribute it and/or 8 8 * modify it under the terms of the GNU General Public License as … … 660 660 ChertPostList::ChertPostList(Xapian::Internal::RefCntPtr<const ChertDatabase> this_db_, 661 661 const string & tname_, 662 662 bool keep_reference) 663 : this_db(keep_reference ? this_db_ : NULL),664 t name(tname_),663 : TermBasedLeafPostList(tname_), 664 this_db(keep_reference ? this_db_ : NULL), 665 665 have_started(false), 666 666 cursor(this_db_->postlist_table.cursor_get()), 667 667 is_at_end(false) -
xapian-core/backends/flint/flint_postlist.cc
2 2 * 3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts 5 * Copyright 2007 Lemur Consulting Ltd5 * Copyright 2007,2009 Lemur Consulting Ltd 6 6 * 7 7 * This program is free software; you can redistribute it and/or 8 8 * modify it under the terms of the GNU General Public License as … … 654 654 */ 655 655 FlintPostList::FlintPostList(Xapian::Internal::RefCntPtr<const FlintDatabase> this_db_, 656 656 const string & tname_) 657 : this_db(this_db_),658 t name(tname_),657 : TermBasedLeafPostList(tname_), 658 this_db(this_db_), 659 659 have_started(false), 660 660 cursor(this_db->postlist_table.cursor_get()), 661 661 is_at_end(false) -
xapian-core/backends/flint/flint_postlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd6 * Copyright 2007,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 102 102 103 103 /** A postlist in a flint database. 104 104 */ 105 class FlintPostList : public LeafPostList {105 class FlintPostList : public TermBasedLeafPostList { 106 106 protected: // FlintModifiedPostList needs to access these. 107 107 /** The database we are searching. This pointer is held so that the 108 108 * database doesn't get deleted before us, and also to give us access … … 110 110 */ 111 111 Xapian::Internal::RefCntPtr<const FlintDatabase> this_db; 112 112 113 /// The termname for this postlist.114 string tname;115 116 113 /// Whether we've started reading the list yet. 117 114 bool have_started; 118 115 -
xapian-core/backends/flint/flint_alldocspostlist.h
2 2 * @brief A PostList which iterates over all documents in a FlintDatabase. 3 3 */ 4 4 /* Copyright (C) 2006,2007,2008,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 25 26 26 27 #include "leafpostlist.h" 27 28 28 class FlintAllDocsPostList : public LeafPostList {29 class FlintAllDocsPostList : public TermBasedLeafPostList { 29 30 /// Don't allow assignment. 30 31 void operator=(const FlintAllDocsPostList &); 31 32 … … 50 51 public: 51 52 FlintAllDocsPostList(Xapian::Internal::RefCntPtr<const FlintDatabase> db_, 52 53 Xapian::doccount doccount_) 53 : db(db_), doccount(doccount_), cursor(db->termlist_table.cursor_get()), 54 : TermBasedLeafPostList(std::string()), 55 db(db_), doccount(doccount_), cursor(db->termlist_table.cursor_get()), 54 56 current_did(0) 55 57 { 56 58 cursor->find_entry(""); -
xapian-bindings/python/smoketest2.py
213 213 qp.set_stemming_strategy(qp.STEM_SOME) 214 214 qp.set_stemmer(xapian.Stem('en')) 215 215 expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), 216 "(Zfoo:(pos=1) AND ( out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")216 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))") 217 217 218 218 expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), 219 219 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))") -
xapian-bindings/python/smoketest3.py
Property changes on: xapian-bindings/python/generate-python-exceptions ___________________________________________________________________ Deleted: svn:mergeinfo
153 153 154 154 # Feature test for Document.values 155 155 count = 0 156 for term in doc.values():156 for term in list(doc.values()): 157 157 count += 1 158 158 expect(count, 0, "Unexpected number of entries in doc.values") 159 159 … … 213 213 qp.set_stemming_strategy(qp.STEM_SOME) 214 214 qp.set_stemmer(xapian.Stem('en')) 215 215 expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), 216 "(Zfoo:(pos=1) AND ( out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")216 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))") 217 217 218 218 expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), 219 219 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")