Ticket #50: opsynonym_changes_12490_12492.patch
File opsynonym_changes_12490_12492.patch, 62.8 KB (added by , 16 years ago) |
---|
-
xapian-maintainer-tools/win32msvc/win32_matcher.mak
34 34 $(INTDIR)\queryoptimiser.obj\ 35 35 $(INTDIR)\rset.obj\ 36 36 $(INTDIR)\selectpostlist.obj\ 37 $(INTDIR)\synonympostlist.obj\ 37 38 $(INTDIR)\valuerangepostlist.obj\ 38 39 $(INTDIR)\valuegepostlist.obj\ 39 40 $(INTDIR)\xorpostlist.obj\ … … 60 61 $(INTDIR)\queryoptimiser.cc\ 61 62 $(INTDIR)\rset.cc\ 62 63 $(INTDIR)\selectpostlist.cc\ 64 $(INTDIR)\synonympostlist.cc\ 63 65 $(INTDIR)\valuerangepostlist.cc\ 64 66 $(INTDIR)\valuegepostlist.cc\ 65 67 $(INTDIR)\xorpostlist.cc\ -
xapian-core/queryparser/queryparser.lemony
2 2 /* queryparser.lemony: build a Xapian::Query object from a user query string. 3 3 * 4 4 * Copyright (C) 2004,2005,2006,2007,2008 Olly Betts 5 * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 287 288 end = db.synonyms_end(term); 288 289 } 289 290 while (syn != end) { 290 q = Query(Query::OP_ OR, q, Query(*syn, 1, pos));291 q = Query(Query::OP_SYNONYM, q, Query(*syn, 1, pos)); 291 292 ++syn; 292 293 } 293 294 } … … 353 354 } 354 355 } 355 356 delete this; 356 return new Query(Query::OP_ OR, subqs.begin(), subqs.end());357 return new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end()); 357 358 } 358 359 359 360 Query * 360 361 Term::as_partial_query(State * state_) const 361 362 { 362 363 Database db = state_->get_database(); 363 vector<Query> subqs; 364 vector<Query> subqs_partial; // A synonym of all the partial terms. 365 vector<Query> subqs_full; // A synonym of all the full terms. 364 366 list<string>::const_iterator piter; 365 367 for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) { 366 368 string root = *piter; 367 369 root += name; 368 370 TermIterator t = db.allterms_begin(root); 369 371 while (t != db.allterms_end(root)) { 370 subqs .push_back(Query(*t, 1, pos));372 subqs_partial.push_back(Query(*t, 1, pos)); 371 373 ++t; 372 374 } 373 375 // Add the term, as it would normally be handled, as an alternative. 374 subqs .push_back(Query(make_term(*piter), 1, pos));376 subqs_full.push_back(Query(make_term(*piter), 1, pos)); 375 377 } 376 378 delete this; 377 return new Query(Query::OP_OR, subqs.begin(), subqs.end()); 379 return new Query(Query::OP_OR, 380 Query(Query::OP_SYNONYM, 381 subqs_partial.begin(), subqs_partial.end()), 382 Query(Query::OP_SYNONYM, 383 subqs_full.begin(), subqs_full.end())); 378 384 } 379 385 380 386 inline bool … … 1176 1182 subqs2.push_back(Query(*syn, 1, pos)); 1177 1183 ++syn; 1178 1184 } 1179 Query q_synonym_terms(Query::OP_ OR, subqs2.begin(), subqs2.end());1185 Query q_synonym_terms(Query::OP_SYNONYM, subqs2.begin(), subqs2.end()); 1180 1186 subqs2.clear(); 1181 subqs.push_back(Query(Query::OP_ OR,1187 subqs.push_back(Query(Query::OP_SYNONYM, 1182 1188 q_original_terms, q_synonym_terms)); 1183 1189 } 1184 1190 } else { -
xapian-core/matcher/Makefile.mk
18 18 matcher/queryoptimiser.h\ 19 19 matcher/remotesubmatch.h\ 20 20 matcher/selectpostlist.h\ 21 matcher/synonympostlist.h\ 21 22 matcher/valuegepostlist.h\ 22 23 matcher/valuerangepostlist.h\ 23 24 matcher/xorpostlist.h … … 54 55 matcher/queryoptimiser.cc\ 55 56 matcher/rset.cc\ 56 57 matcher/selectpostlist.cc\ 58 matcher/synonympostlist.cc\ 57 59 matcher/valuegepostlist.cc\ 58 60 matcher/valuerangepostlist.cc\ 59 61 matcher/xorpostlist.cc -
xapian-core/matcher/andpostlist.h
2 2 * 3 3 * Copyright 2002 Ananova Ltd 4 4 * Copyright 2003,2004,2009 Olly Betts 5 * Copyright 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 70 71 MultiMatch *matcher_, 71 72 Xapian::doccount dbsize_, 72 73 bool replacement = false); 74 75 /** get_wdf() for AND postlists returns the sum of the wdfs of the sub 76 * postlists - this is desirable when the AND is part of a synonym. 77 */ 78 Xapian::termcount get_wdf() const; 73 79 }; 74 80 75 81 #endif /* OM_HGUARD_ANDPOSTLIST_H */ -
xapian-core/matcher/multimatch.cc
790 790 791 791 LOGVALUE(MATCH, denom); 792 792 LOGVALUE(MATCH, percent_scale); 793 Assert(percent_scale <= denom); 794 denom *= greatest_wt; 795 Assert(denom > 0); 796 percent_scale /= denom; 793 AssertRel(percent_scale,<=,denom); 794 if (denom == 0) { 795 // This happens if the top-level operator is OP_SYNONYM. 796 percent_scale = 1.0 / greatest_wt; 797 } else { 798 denom *= greatest_wt; 799 AssertRel(denom,>,0); 800 percent_scale /= denom; 801 } 797 802 } else { 798 803 // If all the terms match, the 2 sums of weights cancel 799 804 percent_scale = 1.0 / greatest_wt; -
xapian-core/matcher/localmatch.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd6 * Copyright 2007,2008,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 31 31 #include "omdebug.h" 32 32 #include "omqueryinternal.h" 33 33 #include "queryoptimiser.h" 34 #include "synonympostlist.h" 34 35 #include "weightinternal.h" 35 36 36 37 #include <cfloat> … … 111 112 } 112 113 113 114 PostList * 115 LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher, 116 double factor) 117 { 118 DEBUGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist", 119 "[or_pl], [matcher], " << factor); 120 LOGVALUE(MATCH, or_pl->get_termfreq_est()); 121 AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher)); 122 AutoPtr<Xapian::Weight> wt(wt_factory->clone_()); 123 124 // FIXME - calculate the reltermfreq to use and pass it in? 125 wt->init_(*stats, qlen, factor, or_pl->get_termfreq_est()); 126 127 res->set_weight(wt.release()); 128 RETURN(res.release()); 129 } 130 131 PostList * 114 132 LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query, 115 133 double factor) 116 134 { -
xapian-core/matcher/localmatch.h
2 2 * @brief SubMatch class for a local database. 3 3 */ 4 4 /* Copyright (C) 2006,2007,2009 Olly Betts 5 * Copyright (C) 2007 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 82 83 PostList * get_postlist_and_term_info(MultiMatch *matcher, 83 84 std::map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts); 84 85 86 /** Convert a postlist into a synonym postlist. 87 */ 88 PostList * make_synonym_postlist(PostList * or_pl, MultiMatch * matcher, 89 double factor); 90 85 91 /** Convert an OP_LEAF query to a PostList. 86 92 * 87 93 * This is called by QueryOptimiser when it reaches an OP_LEAF query. -
xapian-core/matcher/xorpostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 69 70 PostList * right_, 70 71 MultiMatch * matcher_, 71 72 Xapian::doccount dbsize_); 73 74 /** get_wdf() for XOR postlists returns the wdf of the sub postlist 75 * which is at the current document. 76 */ 77 Xapian::termcount get_wdf() const; 72 78 }; 73 79 74 80 #endif /* OM_HGUARD_XORPOSTLIST_H */ -
xapian-core/matcher/synonympostlist.h
1 /** @file synonympostlist.h 2 * @brief Combine subqueries, weighting as if they are synonyms 3 */ 4 /* Copyright 2007,2009 Lemur Consulting Ltd 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 22 #define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 23 24 #include "multimatch.h" 25 #include "postlist.h" 26 27 /** A postlist comprising several postlists SYNONYMed together. 28 * 29 * This postlist returns all postings in the OR of the sub postlists, but 30 * returns weights as if they represented a single term. The term frequency 31 * portion of the weight is approximated. 32 */ 33 class SynonymPostList : public PostList { 34 /** The subtree, which starts as an OR of all the sub-postlists being 35 * joined with Synonym, but may decay into something else. 36 */ 37 PostList * subtree; 38 39 /** The object which is using this postlist to perform a match. 40 * 41 * This object needs to be notified when the tree changes such that the 42 * maximum weights need to be recalculated. 43 */ 44 MultiMatch * matcher; 45 46 /** Weighting object used for calculating the synonym weights. 47 */ 48 const Xapian::Weight * wt; 49 50 /** Flag indicating whether the weighting object needs the doclength. 51 */ 52 bool want_doclength; 53 54 /** Flag indicating whether the weighting object needs the wdf. 55 */ 56 bool want_wdf; 57 58 public: 59 SynonymPostList(PostList * subtree_, MultiMatch * matcher_) 60 : subtree(subtree_), matcher(matcher_), wt(NULL), 61 want_doclength(false), want_wdf(false) { } 62 63 ~SynonymPostList(); 64 65 /** Set the weight object to be used for the synonym postlist. 66 * 67 * Ownership of the weight object passes to the synonym postlist - the 68 * caller must not delete it after use. 69 */ 70 void set_weight(const Xapian::Weight * wt_); 71 72 PostList *next(Xapian::weight w_min); 73 PostList *skip_to(Xapian::docid did, Xapian::weight w_min); 74 75 Xapian::weight get_weight() const; 76 Xapian::weight get_maxweight() const; 77 Xapian::weight recalc_maxweight(); 78 79 // The following methods just call through to the subtree. 80 Xapian::termcount get_wdf() const; 81 Xapian::doccount get_termfreq_min() const; 82 Xapian::doccount get_termfreq_est() const; 83 Xapian::doccount get_termfreq_max() const; 84 Xapian::docid get_docid() const; 85 Xapian::termcount get_doclength() const; 86 bool at_end() const; 87 88 std::string get_description() const; 89 }; 90 91 #endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */ -
xapian-core/matcher/andmaybepostlist.h
Property changes on: xapian-core/matcher/synonympostlist.h ___________________________________________________________________ Added: svn:eol-style + native
6 6 * Copyright 1999,2000,2001 BrightStation PLC 7 7 * Copyright 2002 Ananova Ltd 8 8 * Copyright 2003,2004,2009 Olly Betts 9 * Copyright 2009 Lemur Consulting Ltd 9 10 * 10 11 * This program is free software; you can redistribute it and/or 11 12 * modify it under the terms of the GNU General Public License as … … 103 104 lmax = l->get_maxweight(); 104 105 rmax = r->get_maxweight(); 105 106 } 107 108 /** get_wdf() for ANDMAYBE postlists returns the sum of the wdfs of the 109 * sub postlists which are at the current document - this is desirable 110 * when the ANDMAYBE is part of a synonym. 111 */ 112 Xapian::termcount get_wdf() const; 106 113 }; 107 114 108 115 #endif /* OM_HGUARD_ANDMAYBEPOSTLIST_H */ -
xapian-core/matcher/orpostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 67 68 PostList * right_, 68 69 MultiMatch * matcher_, 69 70 Xapian::doccount dbsize_); 71 72 /** get_wdf() for OR postlists returns the sum of the wdfs of the 73 * sub postlists which are at the current document - this is desirable 74 * when the OR is part of a synonym. 75 */ 76 Xapian::termcount get_wdf() const; 70 77 }; 71 78 72 79 #endif /* OM_HGUARD_ORPOSTLIST_H */ -
xapian-core/matcher/andnotpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 175 176 DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_doclength", ""); 176 177 RETURN(l->get_doclength()); 177 178 } 179 180 Xapian::termcount 181 AndNotPostList::get_wdf() const 182 { 183 DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_wdf", ""); 184 RETURN(l->get_wdf()); 185 } -
xapian-core/matcher/andnotpostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 69 70 Xapian::weight w_min, 70 71 Xapian::docid lh, 71 72 Xapian::docid rh); 73 74 /** get_wdf() for ANDNOT postlists returns the wdf of the left hand 75 * side. 76 */ 77 Xapian::termcount get_wdf() const; 72 78 }; 73 79 74 80 #endif /* OM_HGUARD_ANDNOTPOSTLIST_H */ -
xapian-core/matcher/queryoptimiser.cc
82 82 case Xapian::Query::OP_ELITE_SET: 83 83 RETURN(do_or_like(query, factor)); 84 84 85 case Xapian::Query::OP_SYNONYM: 86 RETURN(do_synonym(query, factor)); 87 85 88 case Xapian::Query::OP_AND_NOT: { 86 89 AssertEq(query->subqs.size(), 2); 87 90 PostList * l = do_subquery(query->subqs[0], factor); … … 304 307 // for AND-like operations. 305 308 Xapian::Query::Internal::op_t op = query->op; 306 309 Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR || 307 op == Xapian::Query::OP_XOR );310 op == Xapian::Query::OP_XOR || op == Xapian::Query::OP_SYNONYM); 308 311 309 312 const Xapian::Query::Internal::subquery_list &queries = query->subqs; 310 313 AssertRel(queries.size(), >=, 2); … … 382 385 ComparePostListTermFreqAscending()); 383 386 } 384 387 } 388 389 PostList * 390 QueryOptimiser::do_synonym(const Xapian::Query::Internal *query, double factor) 391 { 392 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_synonym", 393 query << ", " << factor); 394 if (factor == 0.0) { 395 // If we have a factor of 0, we don't care about the weights, so 396 // we're just like a normal OR query. 397 RETURN(do_or_like(query, 0.0)); 398 } 399 400 // We currently assume wqf is 1 for calculating the synonym's weight 401 // since conceptually the synonym is one "virtual" term. If we were 402 // to combine multiple occurrences of the same synonym expansion into 403 // a single instance with wqf set, we would want to use the wqf. 404 AssertEq(query->wqf, 0); 405 406 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a 407 // SynonymPostList, which supplies the weights. 408 RETURN(localsubmatch.make_synonym_postlist(do_or_like(query, 0.0), 409 matcher, factor)); 410 } -
xapian-core/matcher/queryoptimiser.h
2 2 * @brief Convert a Xapian::Query::Internal tree into an optimal PostList tree. 3 3 */ 4 4 /* Copyright (C) 2007,2008,2009 Olly Betts 5 * Copyright (C) 2008 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 88 89 */ 89 90 PostList * do_or_like(const Xapian::Query::Internal *query, double factor); 90 91 92 /** Optimise a synonym Xapian::Query::Internal subtree into a PostList 93 * 94 * @param query The subtree to optimise. 95 * @param factor How much to scale weights for this subtree by. 96 * 97 * @return A PostList subtree. 98 */ 99 PostList * do_synonym(const Xapian::Query::Internal *query, double factor); 100 91 101 public: 92 102 QueryOptimiser(const Xapian::Database::Internal & db_, 93 103 LocalSubMatch & localsubmatch_, -
xapian-core/matcher/andpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd6 * Copyright 2007,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 203 203 AssertEq(doclength, r->get_doclength()); 204 204 RETURN(doclength); 205 205 } 206 207 Xapian::termcount 208 AndPostList::get_wdf() const 209 { 210 DEBUGCALL(MATCH, Xapian::termcount, "AndPostList::get_wdf", ""); 211 RETURN(l->get_wdf() + r->get_wdf()); 212 } -
xapian-core/matcher/xorpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 294 295 Assert(lhead > rhead); 295 296 return r->get_doclength(); 296 297 } 298 299 Xapian::termcount 300 XorPostList::get_wdf() const 301 { 302 DEBUGCALL(MATCH, Xapian::termcount, "XorPostList::get_wdf", ""); 303 if (lhead < rhead) RETURN(l->get_wdf()); 304 RETURN(r->get_wdf()); 305 } -
xapian-core/matcher/synonympostlist.cc
1 /** @file synonympostlist.cc 2 * @brief Combine subqueries, weighting as if they are synonyms 3 */ 4 /* Copyright 2007,2009 Lemur Consulting Ltd 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation; either version 2 of the 9 * License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 19 * USA 20 */ 21 22 #include <config.h> 23 24 #include "synonympostlist.h" 25 26 #include "branchpostlist.h" 27 #include "debuglog.h" 28 29 SynonymPostList::~SynonymPostList() 30 { 31 delete wt; 32 delete subtree; 33 } 34 35 void 36 SynonymPostList::set_weight(const Xapian::Weight * wt_) 37 { 38 delete wt; 39 wt = wt_; 40 want_doclength = wt->get_sumpart_needs_doclength_(); 41 want_wdf = wt->get_sumpart_needs_wdf_(); 42 } 43 44 PostList * 45 SynonymPostList::next(Xapian::weight w_min) 46 { 47 LOGCALL(MATCH, PostList *, "SynonymPostList::next", w_min); 48 (void)w_min; 49 next_handling_prune(subtree, 0, matcher); 50 RETURN(NULL); 51 } 52 53 PostList * 54 SynonymPostList::skip_to(Xapian::docid did, Xapian::weight w_min) 55 { 56 LOGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did << ", " << w_min); 57 (void)w_min; 58 skip_to_handling_prune(subtree, did, 0, matcher); 59 RETURN(NULL); 60 } 61 62 Xapian::weight 63 SynonymPostList::get_weight() const 64 { 65 LOGCALL(MATCH, Xapian::weight, "SynonymPostList::get_weight", ""); 66 // The wdf returned can be higher than the doclength. In particular, this 67 // can currently occur if the query contains a term more than once; the wdf 68 // of each occurrence is added up. 69 // 70 // However, it's reasonable for weighting algorithms to optimise by 71 // assuming that get_wdf() will never return more than get_doclength(), 72 // since the doclength is the sum of the wdfs. 73 // 74 // Therefore, we simply clamp the wdf value to the doclength, to ensure 75 // that this is true. Note that this requires the doclength to be 76 // calculated even if the weight object doesn't want it. 77 78 if (want_wdf) { 79 Xapian::termcount wdf = get_wdf(); 80 Xapian::termcount doclen = get_doclength(); 81 if (wdf > doclen) wdf = doclen; 82 RETURN(wt->get_sumpart(wdf, doclen)); 83 } 84 RETURN(wt->get_sumpart(0, want_doclength ? get_doclength() : 0)); 85 } 86 87 Xapian::weight 88 SynonymPostList::get_maxweight() const 89 { 90 LOGCALL(MATCH, Xapian::weight, "SynonymPostList::get_maxweight", ""); 91 RETURN(wt->get_maxpart()); 92 } 93 94 Xapian::weight 95 SynonymPostList::recalc_maxweight() 96 { 97 LOGCALL(MATCH, Xapian::weight, "SynonymPostList::recalc_maxweight", ""); 98 RETURN(SynonymPostList::get_maxweight()); 99 } 100 101 Xapian::termcount 102 SynonymPostList::get_wdf() const { 103 LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_wdf", ""); 104 RETURN(subtree->get_wdf()); 105 } 106 107 Xapian::doccount 108 SynonymPostList::get_termfreq_min() const { 109 LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", ""); 110 RETURN(subtree->get_termfreq_min()); 111 } 112 113 Xapian::doccount 114 SynonymPostList::get_termfreq_est() const { 115 LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", ""); 116 RETURN(subtree->get_termfreq_est()); 117 } 118 119 Xapian::doccount 120 SynonymPostList::get_termfreq_max() const { 121 LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", ""); 122 RETURN(subtree->get_termfreq_max()); 123 } 124 125 Xapian::docid 126 SynonymPostList::get_docid() const { 127 LOGCALL(MATCH, Xapian::docid, "SynonymPostList::get_docid", ""); 128 RETURN(subtree->get_docid()); 129 } 130 131 Xapian::termcount 132 SynonymPostList::get_doclength() const { 133 LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_doclength", ""); 134 RETURN(subtree->get_doclength()); 135 } 136 137 bool 138 SynonymPostList::at_end() const { 139 LOGCALL(MATCH, bool, "SynonymPostList::at_end", ""); 140 RETURN(subtree->at_end()); 141 } 142 143 std::string 144 SynonymPostList::get_description() const 145 { 146 return "(Synonym " + subtree->get_description() + ")"; 147 } -
xapian-core/matcher/multiandpostlist.h
Property changes on: xapian-core/matcher/synonympostlist.cc ___________________________________________________________________ Added: svn:eol-style + native
154 154 std::string get_description() const; 155 155 156 156 /** get_wdf() for MultiAndPostlists returns the sum of the wdfs of the 157 * sub postlists. The wdf isn't really meaningful in many situations, 158 * but if the lists are being combined as a synonym we want the sum of 159 * the wdfs, so we do that in general. 157 * sub postlists. 158 * 159 * The wdf isn't really meaningful in many situations, but if the lists 160 * are being combined as a synonym we want the sum of the wdfs, so we do 161 * that in general. 160 162 */ 161 virtual Xapian::termcount get_wdf() const; 162 }; 163 Xapian::termcount get_wdf() const; }; 163 164 164 165 #endif // XAPIAN_INCLUDED_MULTIANDPOSTLIST_H -
xapian-core/matcher/orpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2001,2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 258 259 259 260 RETURN(doclength); 260 261 } 262 263 Xapian::termcount 264 OrPostList::get_wdf() const 265 { 266 DEBUGCALL(MATCH, Xapian::termcount, "OrPostList::get_wdf", ""); 267 if (lhead < rhead) RETURN(l->get_wdf()); 268 if (lhead > rhead) RETURN(r->get_wdf()); 269 RETURN(l->get_wdf() + r->get_wdf()); 270 } -
xapian-core/matcher/andmaybepostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2005,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 169 170 if (lhead == rhead) AssertEq(l->get_doclength(), r->get_doclength()); 170 171 RETURN(l->get_doclength()); 171 172 } 173 174 Xapian::termcount 175 AndMaybePostList::get_wdf() const 176 { 177 DEBUGCALL(MATCH, Xapian::termcount, "AndMaybePostList::get_wdf", ""); 178 if (lhead == rhead) RETURN(l->get_wdf() + r->get_wdf()); 179 RETURN(l->get_wdf()); 180 } -
xapian-core/weight/weight.cc
77 77 init(factor); 78 78 } 79 79 80 void 81 Weight::init_(const Internal & stats, Xapian::termcount query_length, 82 double factor, Xapian::doccount termfreq) 83 { 84 LOGCALL_VOID(MATCH, "Weight::init_", stats << ", " << query_length << 85 ", " << factor << ", " << termfreq); 86 // Synonym case. 87 collection_size_ = stats.collection_size; 88 rset_size_ = stats.rset_size; 89 if (stats_needed & AVERAGE_LENGTH) 90 average_length_ = stats.get_average_length(); 91 if (stats_needed & DOC_LENGTH_MAX) 92 doclength_upper_bound_ = stats.db.get_doclength_upper_bound(); 93 if (stats_needed & DOC_LENGTH_MIN) 94 doclength_lower_bound_ = stats.db.get_doclength_lower_bound(); 95 96 // The doclength is an upper bound on the wdf. This is obviously true for 97 // normal terms, but SynonymPostList ensures that it is also true for 98 // synonym terms by clamping the wdf values returned to the doclength. 99 // 100 // (This clamping is only actually necessary in cases where a constituent 101 // term of the synonym is repeated.) 102 if (stats_needed & WDF_MAX) 103 wdf_upper_bound_ = stats.db.get_doclength_upper_bound(); 104 105 termfreq_ = termfreq; 106 reltermfreq_ = 0; 107 query_length_ = query_length; 108 wqf_ = 1; 109 init(factor); 110 } 111 80 112 Weight::~Weight() { } 81 113 82 114 } -
xapian-core/tests/api_opsynonym.cc
1 /* api_opsynonym.cc: tests which need a backend 2 * 3 * Copyright 2009 Olly Betts 4 * Copyright 2007,2008,2009 Lemur Consulting Ltd 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation; either version 2 of the 9 * License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 19 * USA 20 */ 21 22 #include <config.h> 23 24 #include "api_opsynonym.h" 25 26 #include <map> 27 #include <vector> 28 29 #include <xapian.h> 30 31 #include "backendmanager.h" 32 #include "backendmanager_local.h" 33 #include "testsuite.h" 34 #include "testutils.h" 35 36 #include "apitest.h" 37 38 using namespace std; 39 40 // ####################################################################### 41 // # Tests start here 42 43 // Check a synonym search 44 DEFINE_TESTCASE(synonym1, backend) { 45 Xapian::Database db(get_database("etext")); 46 47 TEST_REL(db.get_doclength_upper_bound(), >, 0); 48 49 Xapian::doccount lots = 214; 50 51 // Make a list of lists of subqueries, which are going to be joined 52 // together as a synonym. 53 vector<vector<Xapian::Query> > subqueries_list; 54 55 vector<Xapian::Query> subqueries; 56 subqueries.push_back(Xapian::Query("date")); 57 subqueries_list.push_back(subqueries); 58 59 // Two terms, which co-occur in some documents. 60 subqueries.clear(); 61 subqueries.push_back(Xapian::Query("sky")); 62 subqueries.push_back(Xapian::Query("date")); 63 subqueries_list.push_back(subqueries); 64 65 // Two terms which are entirely disjoint, and where the maximum weight 66 // doesn't occur in the first or second match. 67 subqueries.clear(); 68 subqueries.push_back(Xapian::Query("gutenberg")); 69 subqueries.push_back(Xapian::Query("blockhead")); 70 subqueries_list.push_back(subqueries); 71 72 subqueries.clear(); 73 subqueries.push_back(Xapian::Query("date")); 74 subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 75 Xapian::Query("sky"), 76 Xapian::Query("glove"))); 77 subqueries_list.push_back(subqueries); 78 79 subqueries.clear(); 80 subqueries.push_back(Xapian::Query("sky")); 81 subqueries.push_back(Xapian::Query("date")); 82 subqueries.push_back(Xapian::Query("stein")); 83 subqueries.push_back(Xapian::Query("ally")); 84 subqueries_list.push_back(subqueries); 85 86 subqueries.clear(); 87 subqueries.push_back(Xapian::Query("attitud")); 88 subqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE, 89 Xapian::Query("german"), 90 Xapian::Query("adventur"))); 91 subqueries_list.push_back(subqueries); 92 93 for (vector<vector<Xapian::Query> >::const_iterator 94 qlist = subqueries_list.begin(); 95 qlist != subqueries_list.end(); ++qlist) 96 { 97 // Run two queries, one joining the subqueries with OR and one joining them 98 // with SYNONYM. 99 Xapian::Enquire enquire(db); 100 101 // Do the search with OR 102 Xapian::Query orquery(Xapian::Query(Xapian::Query::OP_OR, qlist->begin(), qlist->end())); 103 enquire.set_query(orquery); 104 Xapian::MSet ormset = enquire.get_mset(0, lots); 105 106 // Do the search with synonym, getting all the results. 107 Xapian::Query synquery(Xapian::Query::OP_SYNONYM, qlist->begin(), qlist->end()); 108 enquire.set_query(synquery); 109 Xapian::MSet mset = enquire.get_mset(0, lots); 110 111 // Check that the queries return some results. 112 TEST_NOT_EQUAL(mset.size(), 0); 113 // Check that the queries return the same number of results. 114 TEST_EQUAL(mset.size(), ormset.size()); 115 map<Xapian::docid, Xapian::weight> values_or; 116 map<Xapian::docid, Xapian::weight> values_synonym; 117 for (Xapian::doccount i = 0; i < mset.size(); ++i) { 118 values_or[*ormset[i]] = ormset[i].get_weight(); 119 values_synonym[*mset[i]] = mset[i].get_weight(); 120 } 121 TEST_EQUAL(values_or.size(), values_synonym.size()); 122 123 /* Check that the most of the weights for items in the "or" mset are 124 * different from those in the "synonym" mset. */ 125 int same_weight = 0; 126 int different_weight = 0; 127 for (map<Xapian::docid, Xapian::weight>::const_iterator 128 j = values_or.begin(); 129 j != values_or.end(); ++j) 130 { 131 Xapian::docid did = j->first; 132 // Check that all the results in the or tree make it to the synonym tree. 133 TEST(values_synonym.find(did) != values_synonym.end()); 134 if (values_or[did] == values_synonym[did]) { 135 same_weight += 1; 136 } else { 137 different_weight += 1; 138 } 139 } 140 if (qlist->size() == 1) { 141 // Had a single term - check that all the weights were the same. 142 TEST_EQUAL(different_weight, 0); 143 TEST_NOT_EQUAL(same_weight, 0); 144 } else { 145 // Check that most of the weights differ. 146 TEST_NOT_EQUAL(different_weight, 0); 147 TEST_REL(same_weight, <, different_weight); 148 } 149 150 // Do the search with synonym, but just get the top result. 151 // (Regression test - the OR subquery in the synonym postlist tree used 152 // to shortcut incorrectly, and return the wrong result here). 153 Xapian::MSet mset_top = enquire.get_mset(0, 1); 154 TEST_EQUAL(mset_top.size(), 1); 155 TEST(mset_range_is_same(mset_top, 0, mset, 0, 1)); 156 } 157 return true; 158 } 159 160 // Regression test - test a synonym search with a MultiAndPostlist. 161 DEFINE_TESTCASE(synonym2, backend) { 162 Xapian::Query query; 163 vector<Xapian::Query> subqueries; 164 subqueries.push_back(Xapian::Query("file")); 165 subqueries.push_back(Xapian::Query("the")); 166 subqueries.push_back(Xapian::Query("next")); 167 subqueries.push_back(Xapian::Query("reader")); 168 query = Xapian::Query(Xapian::Query::OP_AND, subqueries.begin(), subqueries.end()); 169 subqueries.clear(); 170 subqueries.push_back(query); 171 subqueries.push_back(Xapian::Query("gutenberg")); 172 query = Xapian::Query(Xapian::Query::OP_SYNONYM, subqueries.begin(), subqueries.end()); 173 174 tout << query.get_description() << endl; 175 176 Xapian::Database db(get_database("etext")); 177 Xapian::Enquire enquire(db); 178 enquire.set_query(query); 179 Xapian::MSet mset = enquire.get_mset(0, 10); 180 tout << mset.get_description() << endl; 181 182 // Regression test that OP_SCALE_WEIGHT works with OP_SYNONYM 183 double maxposs = mset.get_max_possible(); 184 query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 10.0); 185 enquire.set_query(query); 186 mset = enquire.get_mset(0, 10); 187 double maxposs2 = mset.get_max_possible(); 188 189 TEST_EQUAL_DOUBLE(maxposs * 10.0, maxposs2); 190 191 return true; 192 } 193 194 // Test a synonym search which has had its weight scaled to 0. 195 DEFINE_TESTCASE(synonym3, backend) { 196 Xapian::Query query = Xapian::Query(Xapian::Query::OP_SYNONYM, 197 Xapian::Query("sky"), 198 Xapian::Query("date")); 199 200 Xapian::Database db(get_database("etext")); 201 Xapian::Enquire enquire(db); 202 enquire.set_query(query); 203 Xapian::MSet mset_orig = enquire.get_mset(0, db.get_doccount()); 204 205 tout << query.get_description() << endl; 206 tout << mset_orig.get_description() << endl; 207 208 // Test that OP_SCALE_WEIGHT with a factor of 0.0 works with OP_SYNONYM 209 // (this has a special codepath to avoid doing the synonym calculation). 210 query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 0.0); 211 enquire.set_query(query); 212 Xapian::MSet mset_zero = enquire.get_mset(0, db.get_doccount()); 213 214 tout << query.get_description() << endl; 215 tout << mset_zero.get_description() << endl; 216 217 // Check that the queries return some results. 218 TEST_NOT_EQUAL(mset_zero.size(), 0); 219 // Check that the queries return the same document IDs, and the the zero 220 // one has zero weight. 221 TEST_EQUAL(mset_zero.size(), mset_orig.size()); 222 223 map<Xapian::docid, Xapian::weight> values_orig; 224 map<Xapian::docid, Xapian::weight> values_zero; 225 for (Xapian::doccount i = 0; i < mset_zero.size(); ++i) { 226 TEST_NOT_EQUAL(mset_orig[i].get_weight(), 0.0); 227 TEST_EQUAL(mset_zero[i].get_weight(), 0.0); 228 229 values_orig[*mset_orig[i]] = mset_orig[i].get_weight(); 230 values_zero[*mset_zero[i]] = mset_zero[i].get_weight(); 231 } 232 233 for (map<Xapian::docid, Xapian::weight>::const_iterator 234 j = values_orig.begin(); 235 j != values_orig.end(); ++j) 236 { 237 Xapian::docid did = j->first; 238 // Check that all the results in the orig mset are in the zero mset. 239 TEST(values_zero.find(did) != values_zero.end()); 240 } 241 TEST_EQUAL(values_orig.size(), values_zero.size()); 242 243 return true; 244 } -
xapian-core/tests/queryparsertest.cc
Property changes on: xapian-core/tests/api_opsynonym.cc ___________________________________________________________________ Added: svn:eol-style + native
1 1 /* queryparsertest.cc: Tests of Xapian::QueryParser 2 2 * 3 3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts 4 * Copyright (C) 2007,2009 Lemur Consulting Ltd 4 5 * 5 6 * This program is free software; you can redistribute it and/or 6 7 * modify it under the terms of the GNU General Public License as … … 786 787 Xapian::Query qobj = qp.parse_query("ab*", Xapian::QueryParser::FLAG_WILDCARD); 787 788 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(abc:(pos=1))"); 788 789 qobj = qp.parse_query("muscle*", Xapian::QueryParser::FLAG_WILDCARD); 789 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) ORmusclebound:(pos=1)))");790 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) SYNONYM musclebound:(pos=1)))"); 790 791 qobj = qp.parse_query("meat*", Xapian::QueryParser::FLAG_WILDCARD); 791 792 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query()"); 792 793 qobj = qp.parse_query("musc*", Xapian::QueryParser::FLAG_WILDCARD); 793 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) ORmuscular:(pos=1)))");794 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)))"); 794 795 qobj = qp.parse_query("mutt*", Xapian::QueryParser::FLAG_WILDCARD); 795 796 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(mutton:(pos=1))"); 796 797 // Regression test (we weren't lowercasing terms before checking if they … … 879 880 qp.add_prefix("author", "A"); 880 881 Xapian::Query qobj; 881 882 qobj = qp.parse_query("author:h*", Xapian::QueryParser::FLAG_WILDCARD); 882 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) ORAhuxley:(pos=1)))");883 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)))"); 883 884 qobj = qp.parse_query("author:h* test", Xapian::QueryParser::FLAG_WILDCARD); 884 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( Aheinlein:(pos=1) OR Ahuxley:(pos=1) OR test:(pos=2)))");885 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)) OR test:(pos=2)))"); 885 886 return true; 886 887 } 887 888 … … 907 908 doc.add_term("XTcowl"); 908 909 doc.add_term("XTcox"); 909 910 doc.add_term("ZXTcow"); 911 doc.add_term("XONEpartial"); 912 doc.add_term("XONEpartial2"); 913 doc.add_term("XTWOpartial3"); 914 doc.add_term("XTWOpartial4"); 910 915 db.add_document(doc); 911 916 Xapian::QueryParser qp; 912 917 qp.set_database(db); … … 922 927 qobj = qp.parse_query("ab", Xapian::QueryParser::FLAG_PARTIAL); 923 928 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR Zab:(pos=1)))"); 924 929 qobj = qp.parse_query("muscle", Xapian::QueryParser::FLAG_PARTIAL); 925 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( muscle:(pos=1) OR musclebound:(pos=1) OR Zmuscl:(pos=1)))");930 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscle:(pos=1) SYNONYM musclebound:(pos=1)) OR Zmuscl:(pos=1)))"); 926 931 qobj = qp.parse_query("meat", Xapian::QueryParser::FLAG_PARTIAL); 927 932 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(Zmeat:(pos=1))"); 928 933 qobj = qp.parse_query("musc", Xapian::QueryParser::FLAG_PARTIAL); 929 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1) OR Zmusc:(pos=1)))");934 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)) OR Zmusc:(pos=1)))"); 930 935 qobj = qp.parse_query("mutt", Xapian::QueryParser::FLAG_PARTIAL); 931 936 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((mutton:(pos=1) OR Zmutt:(pos=1)))"); 932 937 qobj = qp.parse_query("abc musc", Xapian::QueryParser::FLAG_PARTIAL); 933 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR muscat:(pos=2) OR muscle:(pos=2) OR musclebound:(pos=2) OR muscular:(pos=2) OR Zmusc:(pos=2)))");938 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR (muscat:(pos=2) SYNONYM muscle:(pos=2) SYNONYM musclebound:(pos=2) SYNONYM muscular:(pos=2)) OR Zmusc:(pos=2)))"); 934 939 qobj = qp.parse_query("a* mutt", Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_WILDCARD); 935 940 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR mutton:(pos=2) OR Zmutt:(pos=2)))"); 936 941 937 942 // Check behaviour with stemmed terms, and stem strategy STEM_SOME. 938 943 qobj = qp.parse_query("o", Xapian::QueryParser::FLAG_PARTIAL); 939 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zo:(pos=1)))");944 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zo:(pos=1)))"); 940 945 qobj = qp.parse_query("ou", Xapian::QueryParser::FLAG_PARTIAL); 941 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zou:(pos=1)))");946 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zou:(pos=1)))"); 942 947 qobj = qp.parse_query("out", Xapian::QueryParser::FLAG_PARTIAL); 943 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zout:(pos=1)))");948 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zout:(pos=1)))"); 944 949 qobj = qp.parse_query("outs", Xapian::QueryParser::FLAG_PARTIAL); 945 950 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR Zout:(pos=1)))"); 946 951 qobj = qp.parse_query("outsi", Xapian::QueryParser::FLAG_PARTIAL); … … 952 957 953 958 // Check behaviour with capitalised terms, and stem strategy STEM_SOME. 954 959 qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL); 955 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1,wqf=2) OR outside:(pos=1)))");960 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))"); 956 961 qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL); 957 962 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR outs:(pos=1)))"); 958 963 qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL); … … 961 966 // And now with stemming strategy STEM_ALL. 962 967 qp.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); 963 968 qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL); 964 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1,wqf=2) OR outside:(pos=1)))");969 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))"); 965 970 qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL); 966 971 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR out:(pos=1)))"); 967 972 qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL); … … 970 975 // Check handling of a case with a prefix. 971 976 qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); 972 977 qobj = qp.parse_query("title:cow", Xapian::QueryParser::FLAG_PARTIAL); 973 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( XTcowl:(pos=1) OR XTcows:(pos=1) OR ZXTcow:(pos=1)))");978 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR ZXTcow:(pos=1)))"); 974 979 qobj = qp.parse_query("title:cows", Xapian::QueryParser::FLAG_PARTIAL); 975 980 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcows:(pos=1) OR ZXTcow:(pos=1)))"); 976 981 qobj = qp.parse_query("title:Cow", Xapian::QueryParser::FLAG_PARTIAL); 977 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( XTcowl:(pos=1) OR XTcows:(pos=1) OR XTcow:(pos=1)))");982 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR XTcow:(pos=1)))"); 978 983 qobj = qp.parse_query("title:Cows", Xapian::QueryParser::FLAG_PARTIAL); 979 984 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(XTcows:(pos=1,wqf=2))"); 980 985 … … 982 987 // inflate the wqf of the "parsed as normal" version of a partial term 983 988 // by multiplying it by the number of prefixes mapped to. 984 989 qobj = qp.parse_query("double:vision", Xapian::QueryParser::FLAG_PARTIAL); 985 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) OR ZXTWOvision:(pos=1)))"); 990 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) SYNONYM ZXTWOvision:(pos=1)))"); 991 992 // Test handling of FLAG_PARTIAL when there's more than one prefix. 993 qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL); 994 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (ZXONEpart:(pos=1) SYNONYM ZXTWOpart:(pos=1))))"); 995 996 // Test handling of FLAG_PARTIAL when there's more than one prefix, without 997 // stemming. 998 qp.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); 999 qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL); 1000 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpart:(pos=1) SYNONYM XTWOpart:(pos=1))))"); 1001 qobj = qp.parse_query("double:partial", Xapian::QueryParser::FLAG_PARTIAL); 1002 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpartial:(pos=1) SYNONYM XTWOpartial:(pos=1))))"); 986 1003 987 1004 return true; 988 1005 } … … 1547 1564 } 1548 1565 1549 1566 static test test_synonym_queries[] = { 1550 { "searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) ORZlocate:(pos=1))" },1551 { "search", "(Zsearch:(pos=1) ORfind:(pos=1))" },1552 { "Search", "(search:(pos=1) ORfind:(pos=1))" },1567 { "searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" }, 1568 { "search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" }, 1569 { "Search", "(search:(pos=1) SYNONYM find:(pos=1))" }, 1553 1570 { "Searching", "searching:(pos=1)" }, 1554 { "searching OR terms", "( Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },1555 { "search OR terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1556 { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) ORfind:(pos=1)))" },1557 { "search -terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_NOT Zterm:(pos=2))" },1558 { "+search terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_MAYBE Zterm:(pos=2))" },1559 { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) ORfind:(pos=1)))" },1560 { "search terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1571 { "searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" }, 1572 { "search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1573 { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1574 { "search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" }, 1575 { "+search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" }, 1576 { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1577 { "search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1561 1578 // Shouldn't trigger synonyms: 1562 1579 { "\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" }, 1563 1580 { NULL, NULL } … … 1597 1614 1598 1615 static test test_multi_synonym_queries[] = { 1599 1616 { "sun OR tan OR cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3))" }, 1600 { "sun tan", "( Zsun:(pos=1) OR Ztan:(pos=2) ORbathe:(pos=1))" },1601 { "sun tan cream", "( Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3) ORlotion:(pos=1))" },1602 { "beach sun tan holiday", "(Zbeach:(pos=1) OR Zsun:(pos=2) OR Ztan:(pos=3) OR bathe:(pos=2) OR Zholiday:(pos=4))" },1603 { "sun tan sun tan cream", "( Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1) OR Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5) OR lotion:(pos=3))" },1604 { "single", "(Zsingl:(pos=1) ORrecord:(pos=1))" },1617 { "sun tan", "((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1))" }, 1618 { "sun tan cream", "((Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3)) SYNONYM lotion:(pos=1))" }, 1619 { "beach sun tan holiday", "(Zbeach:(pos=1) OR ((Zsun:(pos=2) OR Ztan:(pos=3)) SYNONYM bathe:(pos=2)) OR Zholiday:(pos=4))" }, 1620 { "sun tan sun tan cream", "(((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1)) OR ((Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5)) SYNONYM lotion:(pos=3)))" }, 1621 { "single", "(Zsingl:(pos=1) SYNONYM record:(pos=1))" }, 1605 1622 { NULL, NULL } 1606 1623 }; 1607 1624 … … 1640 1657 1641 1658 static test test_synonym_op_queries[] = { 1642 1659 { "searching", "Zsearch:(pos=1)" }, 1643 { "~searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) ORZlocate:(pos=1))" },1644 { "~search", "(Zsearch:(pos=1) ORfind:(pos=1))" },1645 { "~Search", "(search:(pos=1) ORfind:(pos=1))" },1660 { "~searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" }, 1661 { "~search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" }, 1662 { "~Search", "(search:(pos=1) SYNONYM find:(pos=1))" }, 1646 1663 { "~Searching", "searching:(pos=1)" }, 1647 { "~searching OR terms", "( Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },1648 { "~search OR terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1649 { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) ORfind:(pos=1)))" },1650 { "~search -terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_NOT Zterm:(pos=2))" },1651 { "+~search terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_MAYBE Zterm:(pos=2))" },1652 { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) ORfind:(pos=1)))" },1653 { "~search terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1664 { "~searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" }, 1665 { "~search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1666 { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1667 { "~search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" }, 1668 { "+~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" }, 1669 { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1670 { "~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1654 1671 // FIXME: should look for multi-term synonym... 1655 1672 { "~\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" }, 1656 1673 { NULL, NULL } -
xapian-core/tests/Makefile.am
116 116 api_db.cc \ 117 117 api_generated.cc \ 118 118 api_nodb.cc \ 119 api_opsynonym.cc \ 119 120 api_percentages.cc \ 120 121 api_posdb.cc \ 121 122 api_query.cc \ -
xapian-core/include/xapian/query.h
119 119 OP_VALUE_GE, 120 120 121 121 /** Filter by a less-than-or-equal test on a document value. */ 122 OP_VALUE_LE 122 OP_VALUE_LE, 123 124 /** Treat a set of queries as synonyms. 125 * 126 * This returns all results which match at least one of the 127 * queries, but weighting as if all the sub-queries are instances 128 * of the same term: so multiple matching terms for a document 129 * increase the wdf value used, and the term frequency is based on 130 * the number of documents which would match an OR of all the 131 * subqueries. 132 * 133 * The term frequency used will usually be an approximation, 134 * because calculating the precise combined term frequency would 135 * be overly expensive. 136 * 137 * Identical to OP_OR, except for the weightings returned. 138 */ 139 OP_SYNONYM 123 140 } op; 124 141 125 142 /** Copy constructor. */ -
xapian-core/include/xapian/weight.h
2 2 * @brief Weighting scheme API. 3 3 */ 4 4 /* Copyright (C) 2007,2008,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 212 213 const std::string & term, Xapian::termcount wqf_, 213 214 double factor); 214 215 216 /** @private @internal Initialise this object to calculate weights for a 217 * synonym. 218 * 219 * @param stats Source of statistics. 220 * @param query_len_ Query length. 221 * @param factor Any scaling factor (e.g. from OP_SCALE_WEIGHT). 222 * @param termfreq The termfreq to use. 223 */ 224 void init_(const Internal & stats, Xapian::termcount query_len_, 225 double factor, Xapian::doccount termfreq); 226 215 227 /** @private @internal Initialise this object to calculate the extra weight 216 228 * component. 217 229 * … … 230 242 return stats_needed & DOC_LENGTH; 231 243 } 232 244 245 /** @private @internal Return true if the WDF is needed. 246 * 247 * If this method returns true, then the WDF will be fetched and passed to 248 * @a get_sumpart(). Otherwise 0 may be passed for the wdf. 249 */ 250 bool get_sumpart_needs_wdf_() const { 251 return stats_needed & WDF; 252 } 253 233 254 protected: 234 255 /// Only allow subclasses to copy us. 235 256 Weight(const Weight &); … … 372 393 need_stat(TERMFREQ); 373 394 need_stat(RELTERMFREQ); 374 395 need_stat(WDF_MAX); 396 need_stat(WDF); 375 397 if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) { 376 398 need_stat(DOC_LENGTH_MIN); 377 399 need_stat(AVERAGE_LENGTH); … … 390 412 need_stat(TERMFREQ); 391 413 need_stat(RELTERMFREQ); 392 414 need_stat(WDF_MAX); 415 need_stat(WDF); 393 416 need_stat(DOC_LENGTH_MIN); 394 417 need_stat(AVERAGE_LENGTH); 395 418 need_stat(DOC_LENGTH); … … 452 475 need_stat(RELTERMFREQ); 453 476 need_stat(DOC_LENGTH_MIN); 454 477 need_stat(WDF_MAX); 478 need_stat(WDF); 455 479 } 456 480 457 481 std::string name() const; -
xapian-core/api/omqueryinternal.cc
65 65 case Xapian::Query::OP_VALUE_RANGE: 66 66 case Xapian::Query::OP_VALUE_GE: 67 67 case Xapian::Query::OP_VALUE_LE: 68 case Xapian::Query::OP_SYNONYM: 68 69 return 0; 69 70 case Xapian::Query::OP_SCALE_WEIGHT: 70 71 return 1; … … 100 101 case Xapian::Query::OP_NEAR: 101 102 case Xapian::Query::OP_PHRASE: 102 103 case Xapian::Query::OP_ELITE_SET: 104 case Xapian::Query::OP_SYNONYM: 103 105 return UINT_MAX; 104 106 default: 105 107 Assert(false); … … 221 223 result += "."; 222 224 result += str_parameter; // serialise_double(get_dbl_parameter()); 223 225 break; 226 case Xapian::Query::OP_SYNONYM: 227 result += "="; 228 break; 224 229 } 225 230 } 226 231 return result; … … 251 256 case Xapian::Query::OP_VALUE_GE: name = "VALUE_GE"; break; 252 257 case Xapian::Query::OP_VALUE_LE: name = "VALUE_LE"; break; 253 258 case Xapian::Query::OP_SCALE_WEIGHT: name = "SCALE_WEIGHT"; break; 259 case Xapian::Query::OP_SYNONYM: name = "SYNONYM"; break; 254 260 } 255 261 return name; 256 262 } … … 584 590 return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT, 585 591 subqs, 0, param); 586 592 } 587 default: 593 case '=': { 594 return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs); 595 } 596 default: 588 597 LOGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'"); 589 598 throw Xapian::InvalidArgumentError("Invalid query string"); 590 599 } … … 809 818 case OP_ELITE_SET: 810 819 case OP_OR: 811 820 case OP_XOR: 821 case OP_SYNONYM: 812 822 // Doing an "OR" type operation - if we've got any MatchNothing 813 823 // subnodes, drop them; except that we mustn't become an empty 814 824 // node due to this, so we never drop a MatchNothing subnode … … 900 910 } 901 911 } 902 912 break; 903 case OP_OR: case OP_AND: case OP_XOR: 913 case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM: 904 914 // Remove duplicates if we can. 905 915 if (subqs.size() > 1) collapse_subqs(); 906 916 break; … … 944 954 void 945 955 Xapian::Query::Internal::collapse_subqs() 946 956 { 947 Assert(op == OP_OR || op == OP_AND || op == OP_XOR );957 Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM); 948 958 typedef set<Xapian::Query::Internal *, SortPosName> subqtable; 949 959 subqtable sqtab; 950 960 … … 1038 1048 Assert(!is_leaf(op)); 1039 1049 if (subq == 0) { 1040 1050 subqs.push_back(0); 1041 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR )) {1051 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) { 1042 1052 // Distribute the subquery. 1043 1053 for (subquery_list::const_iterator i = subq->subqs.begin(); 1044 1054 i != subq->subqs.end(); i++) { … … 1055 1065 Assert(!is_leaf(op)); 1056 1066 if (subq == 0) { 1057 1067 subqs.push_back(0); 1058 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR )) {1068 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) { 1059 1069 // Distribute the subquery. 1060 1070 for (subquery_list::const_iterator i = subq->subqs.begin(); 1061 1071 i != subq->subqs.end(); i++) { -
xapian-bindings/python/smoketest2.py
213 213 qp.set_stemming_strategy(qp.STEM_SOME) 214 214 qp.set_stemmer(xapian.Stem('en')) 215 215 expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), 216 "(Zfoo:(pos=1) AND ( out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")216 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))") 217 217 218 218 expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), 219 219 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))") -
xapian-bindings/python/smoketest3.py
153 153 154 154 # Feature test for Document.values 155 155 count = 0 156 for term in doc.values():156 for term in list(doc.values()): 157 157 count += 1 158 158 expect(count, 0, "Unexpected number of entries in doc.values") 159 159 … … 213 213 qp.set_stemming_strategy(qp.STEM_SOME) 214 214 qp.set_stemmer(xapian.Stem('en')) 215 215 expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), 216 "(Zfoo:(pos=1) AND ( out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")216 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))") 217 217 218 218 expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), 219 219 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")