Ticket #50: opsynonym_changes_12471_12472.patch
File opsynonym_changes_12471_12472.patch, 59.3 KB (added by , 16 years ago) |
---|
-
xapian-maintainer-tools/win32msvc/win32_matcher.mak
34 34 $(INTDIR)\queryoptimiser.obj\ 35 35 $(INTDIR)\rset.obj\ 36 36 $(INTDIR)\selectpostlist.obj\ 37 $(INTDIR)\synonympostlist.obj\ 37 38 $(INTDIR)\valuerangepostlist.obj\ 38 39 $(INTDIR)\valuegepostlist.obj\ 39 40 $(INTDIR)\xorpostlist.obj\ … … 60 61 $(INTDIR)\queryoptimiser.cc\ 61 62 $(INTDIR)\rset.cc\ 62 63 $(INTDIR)\selectpostlist.cc\ 64 $(INTDIR)\synonympostlist.cc\ 63 65 $(INTDIR)\valuerangepostlist.cc\ 64 66 $(INTDIR)\valuegepostlist.cc\ 65 67 $(INTDIR)\xorpostlist.cc\ -
xapian-core/queryparser/queryparser.lemony
2 2 /* queryparser.lemony: build a Xapian::Query object from a user query string. 3 3 * 4 4 * Copyright (C) 2004,2005,2006,2007,2008 Olly Betts 5 * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 160 161 161 162 Query * as_wildcarded_query(State * state) const; 162 163 164 /** Build a query for a term at the very end of a query when FLAG_PARTIAL 165 * is in use. 166 * 167 * This query should match documents containin terms which starts with the 168 * characters seen, but should match exact matches higher (since the user 169 * might have finished typing - we simply don't know). 170 */ 163 171 Query * as_partial_query(State * state_) const; 164 172 165 173 Query get_query() const; … … 279 287 end = db.synonyms_end(term); 280 288 } 281 289 while (syn != end) { 282 q = Query(Query::OP_ OR, q, Query(*syn, 1, pos));290 q = Query(Query::OP_SYNONYM, q, Query(*syn, 1, pos)); 283 291 ++syn; 284 292 } 285 293 } … … 345 353 } 346 354 } 347 355 delete this; 348 return new Query(Query::OP_ OR, subqs.begin(), subqs.end());356 return new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end()); 349 357 } 350 358 351 359 Query * 352 360 Term::as_partial_query(State * state_) const 353 361 { 354 362 Database db = state_->get_database(); 355 vector<Query> subqs; 363 vector<Query> subqs_partial; // A synonym of all the partial terms. 364 vector<Query> subqs_full; // A synonym of all the full terms. 356 365 list<string>::const_iterator piter; 357 366 for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) { 358 367 string root = *piter; 359 368 root += name; 360 369 TermIterator t = db.allterms_begin(root); 361 370 while (t != db.allterms_end(root)) { 362 subqs .push_back(Query(*t, 1, pos));371 subqs_partial.push_back(Query(*t, 1, pos)); 363 372 ++t; 364 373 } 365 374 // Add the term, as it would normally be handled, as an alternative. 366 subqs .push_back(Query(make_term(*piter), 1, pos));375 subqs_full.push_back(Query(make_term(*piter), 1, pos)); 367 376 } 368 377 delete this; 369 return new Query(Query::OP_OR, subqs.begin(), subqs.end()); 378 return new Query(Query::OP_OR, 379 Query(Query::OP_SYNONYM, 380 subqs_partial.begin(), subqs_partial.end()), 381 Query(Query::OP_SYNONYM, 382 subqs_full.begin(), subqs_full.end())); 370 383 } 371 384 372 385 inline bool … … 1168 1181 subqs2.push_back(Query(*syn, 1, pos)); 1169 1182 ++syn; 1170 1183 } 1171 Query q_synonym_terms(Query::OP_ OR, subqs2.begin(), subqs2.end());1184 Query q_synonym_terms(Query::OP_SYNONYM, subqs2.begin(), subqs2.end()); 1172 1185 subqs2.clear(); 1173 subqs.push_back(Query(Query::OP_ OR,1186 subqs.push_back(Query(Query::OP_SYNONYM, 1174 1187 q_original_terms, q_synonym_terms)); 1175 1188 } 1176 1189 } else { -
xapian-core/matcher/Makefile.mk
18 18 matcher/queryoptimiser.h\ 19 19 matcher/remotesubmatch.h\ 20 20 matcher/selectpostlist.h\ 21 matcher/synonympostlist.h\ 21 22 matcher/valuegepostlist.h\ 22 23 matcher/valuerangepostlist.h\ 23 24 matcher/xorpostlist.h … … 54 55 matcher/queryoptimiser.cc\ 55 56 matcher/rset.cc\ 56 57 matcher/selectpostlist.cc\ 58 matcher/synonympostlist.cc\ 57 59 matcher/valuegepostlist.cc\ 58 60 matcher/valuerangepostlist.cc\ 59 61 matcher/xorpostlist.cc -
xapian-core/matcher/branchpostlist.cc
1 1 /** @file branchpostlist.cc 2 2 * @brief Virtual base class for branched types of postlist. 3 3 */ 4 /* Copyright (C) 2007 Lemur Consulting Ltd 5 * Copyright (C) 2007 Olly Betts 4 /* Copyright (C) 2007 Olly Betts 6 5 * 7 6 * This program is free software; you can redistribute it and/or 8 7 * modify it under the terms of the GNU General Public License as … … 29 28 delete l; 30 29 delete r; 31 30 } 32 33 Xapian::termcount34 BranchPostList::get_wdf() const35 {36 return l->get_wdf() + r->get_wdf();37 } -
xapian-core/matcher/branchpostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd7 6 * 8 7 * This program is free software; you can redistribute it and/or 9 8 * modify it under the terms of the GNU General Public License as … … 72 71 : l(l_), r(r_), matcher(matcher_) {} 73 72 74 73 virtual ~BranchPostList(); 75 76 /** get_wdf() for branch postlists returns the sum of the wdfs of the77 * sub postlists. The wdf isn't really meaningful in many situations,78 * but if the lists are being combined as a synonym we want the sum of79 * the wdfs, so we do that in general.80 */81 virtual Xapian::termcount get_wdf() const;82 74 }; 83 75 84 76 // Helper functions - call next/skip_to on a postlist and handle any -
xapian-core/matcher/andpostlist.h
2 2 * 3 3 * Copyright 2002 Ananova Ltd 4 4 * Copyright 2003,2004,2009 Olly Betts 5 * Copyright 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 70 71 MultiMatch *matcher_, 71 72 Xapian::doccount dbsize_, 72 73 bool replacement = false); 74 75 /** get_wdf() for AND postlists returns the sum of the wdfs of the sub 76 * postlists - this is desirable when the AND is part of a synonym. 77 */ 78 Xapian::termcount get_wdf() const; 73 79 }; 74 80 75 81 #endif /* OM_HGUARD_ANDPOSTLIST_H */ -
xapian-core/matcher/multimatch.cc
790 790 791 791 LOGVALUE(MATCH, denom); 792 792 LOGVALUE(MATCH, percent_scale); 793 Assert(percent_scale <= denom); 794 denom *= greatest_wt; 795 Assert(denom > 0); 796 percent_scale /= denom; 793 AssertRel(percent_scale,<=,denom); 794 if (denom == 0) { 795 // This happens if the top-level operator is OP_SYNONYM. 796 percent_scale = 1.0 / greatest_wt; 797 } else { 798 denom *= greatest_wt; 799 AssertRel(denom,>,0); 800 percent_scale /= denom; 801 } 797 802 } else { 798 803 // If all the terms match, the 2 sums of weights cancel 799 804 percent_scale = 1.0 / greatest_wt; -
xapian-core/matcher/localmatch.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd6 * Copyright 2007,2008,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 31 31 #include "omdebug.h" 32 32 #include "omqueryinternal.h" 33 33 #include "queryoptimiser.h" 34 #include "synonympostlist.h" 34 35 #include "weightinternal.h" 35 36 36 37 #include <cfloat> … … 111 112 } 112 113 113 114 PostList * 115 LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher, 116 double factor) 117 { 118 DEBUGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist", 119 "[or_pl], [matcher], " << factor); 120 LOGVALUE(MATCH, or_pl->get_termfreq_est()); 121 AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher)); 122 AutoPtr<Xapian::Weight> wt(wt_factory->clone_()); 123 124 // FIXME - calculate the reltermfreq to use and pass it in? 125 wt->init_(*stats, qlen, factor, or_pl->get_termfreq_est()); 126 127 res->set_weight(wt.release()); 128 RETURN(res.release()); 129 } 130 131 PostList * 114 132 LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query, 115 133 double factor) 116 134 { … … 132 150 Xapian::doccount tf = stats->get_termfreq(query->tname); 133 151 Xapian::weight weight = boolean ? 0 : wt->get_maxpart(); 134 152 Xapian::MSet::Internal::TermFreqAndWeight info(tf, weight); 153 LOGLINE(MATCH, "Setting term_info[" << query->tname << "] to (" << tf << ", " << weight << ")"); 135 154 term_info.insert(make_pair(query->tname, info)); 136 155 } else if (!boolean) { 137 156 i->second.termweight += wt->get_maxpart(); 157 AssertEq(stats->get_termfreq(query->tname), i->second.termfreq); 158 LOGLINE(MATCH, "Increasing term_info[" << query->tname << "] to (" << i->second.termfreq << ", " << i->second.termweight << ")"); 138 159 } 139 160 140 161 LeafPostList * pl = db->open_post_list(query->tname); -
xapian-core/matcher/localmatch.h
2 2 * @brief SubMatch class for a local database. 3 3 */ 4 4 /* Copyright (C) 2006,2007,2009 Olly Betts 5 * Copyright (C) 2007 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify 7 8 * it under the terms of the GNU General Public License as published by … … 82 83 PostList * get_postlist_and_term_info(MultiMatch *matcher, 83 84 std::map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts); 84 85 86 /** Convert a postlist into a synonym postlist. 87 */ 88 PostList * make_synonym_postlist(PostList * or_pl, MultiMatch * matcher, 89 double factor); 90 85 91 /** Convert an OP_LEAF query to a PostList. 86 92 * 87 93 * This is called by QueryOptimiser when it reaches an OP_LEAF query. -
xapian-core/matcher/xorpostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 69 70 PostList * right_, 70 71 MultiMatch * matcher_, 71 72 Xapian::doccount dbsize_); 73 74 /** get_wdf() for OR postlists returns the wdf of the sub postlist 75 * which is at the current document. 76 */ 77 virtual Xapian::termcount get_wdf() const; 72 78 }; 73 79 74 80 #endif /* OM_HGUARD_XORPOSTLIST_H */ -
xapian-core/matcher/synonympostlist.h
1 /** @file synonympostlist.h 2 * @brief Combine subqueries, weighting as if they are synonyms 3 */ 4 /* Copyright 2007,2009 Lemur Consulting Ltd 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 22 #define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 23 24 #include "multimatch.h" 25 #include "postlist.h" 26 27 /** A postlist comprising several postlists SYNONYMed together. 28 * 29 * This postlist returns all postings in the OR of the sub postlists, but 30 * returns weights as if they represented a single term. The term frequency 31 * portion of the weight is approximated. 32 */ 33 class SynonymPostList : public PostList { 34 /** The subtree, which starts as an OR of all the sub-postlists being 35 * joined with Synonym, but may decay into something else. 36 */ 37 PostList * subtree; 38 39 /** The object which is using this postlist to perform a match. 40 * 41 * This object needs to be notified when the tree changes such that the 42 * maximum weights need to be recalculated. 43 */ 44 MultiMatch *matcher; 45 46 /** Weighting object used for calculating the synonym weights. 47 */ 48 const Xapian::Weight * wt; 49 50 /** Flag indicating whether the weighting object needs the doclength. 51 */ 52 bool want_doclength; 53 54 /** Flag indicating whether the weighting object needs the wdf. 55 */ 56 bool want_wdf; 57 58 public: 59 SynonymPostList(PostList *subtree_, MultiMatch * matcher_); 60 61 ~SynonymPostList(); 62 63 /** Set the weight object to be used for the synonym postlist. 64 * 65 * Ownership of the weight object passes to the synonym postlist - the 66 * caller must not delete it after use. 67 */ 68 void set_weight(const Xapian::Weight * wt_); 69 70 PostList *next(Xapian::weight w_min); 71 PostList *skip_to(Xapian::docid did, Xapian::weight w_min); 72 73 Xapian::weight get_weight() const; 74 Xapian::weight get_maxweight() const; 75 Xapian::weight recalc_maxweight(); 76 77 // The following methods just call through to the subtree. 78 Xapian::termcount get_wdf() const; 79 Xapian::doccount get_termfreq_min() const; 80 Xapian::doccount get_termfreq_est() const; 81 Xapian::doccount get_termfreq_max() const; 82 Xapian::docid get_docid() const; 83 Xapian::termcount get_doclength() const; 84 PositionList * read_position_list(); 85 PositionList * open_position_list() const; 86 bool at_end() const; 87 88 std::string get_description() const; 89 }; 90 91 #endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */ -
xapian-core/matcher/andmaybepostlist.h
Property changes on: xapian-core/matcher/synonympostlist.h ___________________________________________________________________ Added: svn:eol-style + native
6 6 * Copyright 1999,2000,2001 BrightStation PLC 7 7 * Copyright 2002 Ananova Ltd 8 8 * Copyright 2003,2004,2009 Olly Betts 9 * Copyright 2009 Lemur Consulting Ltd 9 10 * 10 11 * This program is free software; you can redistribute it and/or 11 12 * modify it under the terms of the GNU General Public License as … … 103 104 lmax = l->get_maxweight(); 104 105 rmax = r->get_maxweight(); 105 106 } 107 108 /** get_wdf() for ANDMAYBE postlists returns the sum of the wdfs of the 109 * sub postlists which are at the current document - this is desirable 110 * when the AND is part of a synonym. 111 */ 112 Xapian::termcount get_wdf() const; 106 113 }; 107 114 108 115 #endif /* OM_HGUARD_ANDMAYBEPOSTLIST_H */ -
xapian-core/matcher/orpostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 67 68 PostList * right_, 68 69 MultiMatch * matcher_, 69 70 Xapian::doccount dbsize_); 71 72 /** get_wdf() for OR postlists returns the sum of the wdfs of the 73 * sub postlists which are at the current document - this is desirable 74 * when the AND is part of a synonym. 75 */ 76 virtual Xapian::termcount get_wdf() const; 70 77 }; 71 78 72 79 #endif /* OM_HGUARD_ORPOSTLIST_H */ -
xapian-core/matcher/andnotpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 175 176 DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_doclength", ""); 176 177 RETURN(l->get_doclength()); 177 178 } 179 180 Xapian::termcount 181 AndNotPostList::get_wdf() const 182 { 183 DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_wdf", ""); 184 RETURN(l->get_wdf()); 185 } -
xapian-core/matcher/andnotpostlist.h
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 69 70 Xapian::weight w_min, 70 71 Xapian::docid lh, 71 72 Xapian::docid rh); 73 74 /// get_wdf() for ANDNOT postlists returns the wdfs of the left hand side. 75 Xapian::termcount get_wdf() const; 72 76 }; 73 77 74 78 #endif /* OM_HGUARD_ANDNOTPOSTLIST_H */ -
xapian-core/matcher/queryoptimiser.cc
82 82 case Xapian::Query::OP_ELITE_SET: 83 83 RETURN(do_or_like(query, factor)); 84 84 85 case Xapian::Query::OP_SYNONYM: 86 RETURN(do_synonym(query, factor)); 87 85 88 case Xapian::Query::OP_AND_NOT: { 86 89 AssertEq(query->subqs.size(), 2); 87 90 PostList * l = do_subquery(query->subqs[0], factor); … … 304 307 // for AND-like operations. 305 308 Xapian::Query::Internal::op_t op = query->op; 306 309 Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR || 307 op == Xapian::Query::OP_XOR );310 op == Xapian::Query::OP_XOR || op == Xapian::Query::OP_SYNONYM); 308 311 309 312 const Xapian::Query::Internal::subquery_list &queries = query->subqs; 310 313 AssertRel(queries.size(), >=, 2); … … 382 385 ComparePostListTermFreqAscending()); 383 386 } 384 387 } 388 389 PostList * 390 QueryOptimiser::do_synonym(const Xapian::Query::Internal *query, double factor) 391 { 392 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_synonym", 393 query << ", " << factor); 394 395 if (factor == 0.0) { 396 // If we have a factor of 0, we don't care about the weights, so 397 // we're just like a normal OR query. 398 RETURN(do_or_like(query, 0.0)); 399 } 400 401 AssertEq(query->wqf, 0); // FIXME - should we be doing something with the wqf? 402 403 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a 404 // SynonymPostList, which supplies the weights. 405 RETURN(localsubmatch.make_synonym_postlist(do_or_like(query, 0.0), 406 matcher, factor)); 407 } -
xapian-core/matcher/queryoptimiser.h
2 2 * @brief Convert a Xapian::Query::Internal tree into an optimal PostList tree. 3 3 */ 4 4 /* Copyright (C) 2007,2008,2009 Olly Betts 5 * Copyright (C) 2008 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 88 89 */ 89 90 PostList * do_or_like(const Xapian::Query::Internal *query, double factor); 90 91 92 /** Optimise a synonym Xapian::Query::Internal subtree into a PostList 93 * 94 * @param query The subtree to optimise. 95 * @param factor How much to scale weights for this subtree by. 96 * 97 * @return A PostList subtree. 98 */ 99 PostList * do_synonym(const Xapian::Query::Internal *query, double factor); 100 91 101 public: 92 102 QueryOptimiser(const Xapian::Database::Internal & db_, 93 103 LocalSubMatch & localsubmatch_, -
xapian-core/matcher/andpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2007 Lemur Consulting Ltd6 * Copyright 2007,2009 Lemur Consulting Ltd 7 7 * 8 8 * This program is free software; you can redistribute it and/or 9 9 * modify it under the terms of the GNU General Public License as … … 203 203 AssertEq(doclength, r->get_doclength()); 204 204 RETURN(doclength); 205 205 } 206 207 Xapian::termcount 208 AndPostList::get_wdf() const 209 { 210 DEBUGCALL(MATCH, Xapian::termcount, "AndPostList::get_wdf", ""); 211 RETURN(l->get_wdf() + r->get_wdf()); 212 } -
xapian-core/matcher/xorpostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 294 295 Assert(lhead > rhead); 295 296 return r->get_doclength(); 296 297 } 298 299 Xapian::termcount 300 XorPostList::get_wdf() const 301 { 302 DEBUGCALL(MATCH, Xapian::termcount, "XorPostList::get_wdf", ""); 303 if (lhead < rhead) RETURN(l->get_wdf()); 304 RETURN(r->get_wdf()); 305 } -
xapian-core/matcher/synonympostlist.cc
1 /** @file synonympostlist.cc 2 * @brief Combine subqueries, weighting as if they are synonyms 3 */ 4 /* Copyright 2007,2009 Lemur Consulting Ltd 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation; either version 2 of the 9 * License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 19 * USA 20 */ 21 22 #include <config.h> 23 24 #include "synonympostlist.h" 25 26 #include "branchpostlist.h" 27 #include "debuglog.h" 28 29 SynonymPostList::SynonymPostList(PostList *subtree_, 30 MultiMatch * matcher_) 31 : subtree(subtree_), 32 matcher(matcher_), 33 wt(NULL), 34 want_doclength(false), 35 want_wdf(false) 36 { 37 } 38 39 SynonymPostList::~SynonymPostList() 40 { 41 delete wt; 42 delete subtree; 43 } 44 45 void 46 SynonymPostList::set_weight(const Xapian::Weight * wt_) 47 { 48 delete wt; 49 wt = wt_; 50 want_doclength = wt_->get_sumpart_needs_doclength_(); 51 want_wdf = wt->get_sumpart_needs_wdf_(); 52 } 53 54 PostList * 55 SynonymPostList::next(Xapian::weight w_min) 56 { 57 LOGCALL(MATCH, PostList *, "SynonymPostList::next", w_min); 58 (void)w_min; 59 next_handling_prune(subtree, 0, matcher); 60 RETURN(NULL); 61 } 62 63 PostList * 64 SynonymPostList::skip_to(Xapian::docid did, Xapian::weight w_min) 65 { 66 LOGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did << ", " << w_min); 67 (void)w_min; 68 skip_to_handling_prune(subtree, did, 0, matcher); 69 RETURN(NULL); 70 } 71 72 Xapian::weight 73 SynonymPostList::get_weight() const 74 { 75 76 // The wdf returned can be higher than the doclength. In particular, this 77 // can currently occur if the query contains a term more than once; the wdf 78 // of each occurrence is added up. 79 // 80 // However, it's reasonable for weighting algorithms to optimise by 81 // assuming that get_wdf() will always reeturn less than get_doclength(), 82 // since the doclength is the sum of the wdfs. 83 // 84 // Therefore, we simply clamp the wdf value to the doclength, to ensure 85 // that this is true. Note that this requires the doclength to be 86 // calculated even if the weight object doesn't want it. 87 88 if (want_wdf) { 89 Xapian::termcount wdf = get_wdf(); 90 Xapian::termcount doclen = get_doclength(); 91 if (wdf > doclen) wdf = doclen; 92 return wt->get_sumpart(wdf, doclen); 93 } 94 return wt->get_sumpart(0, want_doclength ? get_doclength() : 0); 95 } 96 97 Xapian::weight 98 SynonymPostList::get_maxweight() const 99 { 100 return wt->get_maxpart(); 101 } 102 103 Xapian::weight 104 SynonymPostList::recalc_maxweight() 105 { 106 return SynonymPostList::get_maxweight(); 107 } 108 109 Xapian::termcount 110 SynonymPostList::get_wdf() const { 111 return subtree->get_wdf(); 112 } 113 114 Xapian::doccount 115 SynonymPostList::get_termfreq_min() const { 116 return subtree->get_termfreq_min(); 117 } 118 119 Xapian::doccount 120 SynonymPostList::get_termfreq_est() const { 121 return subtree->get_termfreq_est(); 122 } 123 124 Xapian::doccount 125 SynonymPostList::get_termfreq_max() const { 126 return subtree->get_termfreq_max(); 127 } 128 129 Xapian::docid 130 SynonymPostList::get_docid() const { 131 return subtree->get_docid(); 132 } 133 134 Xapian::termcount 135 SynonymPostList::get_doclength() const { 136 return subtree->get_doclength(); 137 } 138 139 PositionList * 140 SynonymPostList::read_position_list() { 141 return subtree->read_position_list(); 142 } 143 144 PositionList * 145 SynonymPostList::open_position_list() const { 146 return subtree->open_position_list(); 147 } 148 149 bool 150 SynonymPostList::at_end() const { 151 return subtree->at_end(); 152 } 153 154 std::string 155 SynonymPostList::get_description() const 156 { 157 return "(Synonym " + subtree->get_description() + ")"; 158 } -
xapian-core/matcher/orpostlist.cc
Property changes on: xapian-core/matcher/synonympostlist.cc ___________________________________________________________________ Added: svn:eol-style + native
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2001,2002 Ananova Ltd 5 5 * Copyright 2003,2004,2007,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 258 259 259 260 RETURN(doclength); 260 261 } 262 263 Xapian::termcount 264 OrPostList::get_wdf() const 265 { 266 DEBUGCALL(MATCH, Xapian::termcount, "OrPostList::get_wdf", ""); 267 if (lhead < rhead) RETURN(l->get_wdf()); 268 if (lhead > rhead) RETURN(r->get_wdf()); 269 RETURN(l->get_wdf() + r->get_wdf()); 270 } -
xapian-core/matcher/andmaybepostlist.cc
3 3 * Copyright 1999,2000,2001 BrightStation PLC 4 4 * Copyright 2002 Ananova Ltd 5 5 * Copyright 2003,2004,2005,2008,2009 Olly Betts 6 * Copyright 2009 Lemur Consulting Ltd 6 7 * 7 8 * This program is free software; you can redistribute it and/or 8 9 * modify it under the terms of the GNU General Public License as … … 169 170 if (lhead == rhead) AssertEq(l->get_doclength(), r->get_doclength()); 170 171 RETURN(l->get_doclength()); 171 172 } 173 174 Xapian::termcount 175 AndMaybePostList::get_wdf() const 176 { 177 DEBUGCALL(MATCH, Xapian::termcount, "AndMaybePostList::get_wdf", ""); 178 if (lhead == rhead) RETURN(l->get_wdf() + r->get_wdf()); 179 RETURN(l->get_wdf()); 180 } -
xapian-core/weight/weight.cc
73 73 init(factor); 74 74 } 75 75 76 void 77 Weight::init_(const Internal & stats, Xapian::termcount query_length, 78 double factor, Xapian::doccount termfreq) 79 { 80 // Synonym case. 81 collection_size_ = stats.collection_size; 82 rset_size_ = stats.rset_size; 83 average_length_ = stats.get_average_length(); 84 doclength_upper_bound_ = stats.db.get_doclength_upper_bound(); 85 doclength_lower_bound_ = stats.db.get_doclength_lower_bound(); 86 // For a synonym, the doclength is an upper bound on the wdf. 87 // FIXME: foo OP_SYNONYM foo could exceed this, but we probably need to 88 // handle repeated terms better somehow. 89 wdf_upper_bound_ = stats.db.get_doclength_upper_bound(); 90 termfreq_ = termfreq; 91 reltermfreq_ = 0; 92 query_length_ = query_length; 93 wqf_ = 1; 94 init(factor); 95 } 96 76 97 Weight::~Weight() { } 77 98 78 99 } -
xapian-core/tests/api_db.cc
1457 1457 return true; 1458 1458 } 1459 1459 1460 // Check a synonym search 1461 DEFINE_TESTCASE(synonym1, backend) { 1462 Xapian::Database db(get_database("etext")); 1463 Xapian::doccount lots = 214; 1464 1465 // Make a list of lists of subqueries, which are going to be joined 1466 // together as a synonym. 1467 vector<vector<Xapian::Query> > subqueries_list; 1468 1469 vector<Xapian::Query> subqueries; 1470 subqueries.push_back(Xapian::Query("date")); 1471 subqueries_list.push_back(subqueries); 1472 1473 // Two terms, which co-occur in some documents. 1474 subqueries.clear(); 1475 subqueries.push_back(Xapian::Query("sky")); 1476 subqueries.push_back(Xapian::Query("date")); 1477 subqueries_list.push_back(subqueries); 1478 1479 // Two terms which are entirely disjoint, and where the maximum weight 1480 // doesn't occur in the first or second match. 1481 subqueries.clear(); 1482 subqueries.push_back(Xapian::Query("gutenberg")); 1483 subqueries.push_back(Xapian::Query("blockhead")); 1484 subqueries_list.push_back(subqueries); 1485 1486 subqueries.clear(); 1487 subqueries.push_back(Xapian::Query("date")); 1488 subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 1489 Xapian::Query("sky"), 1490 Xapian::Query("glove"))); 1491 subqueries_list.push_back(subqueries); 1492 1493 subqueries.clear(); 1494 subqueries.push_back(Xapian::Query("sky")); 1495 subqueries.push_back(Xapian::Query("date")); 1496 subqueries.push_back(Xapian::Query("stein")); 1497 subqueries.push_back(Xapian::Query("ally")); 1498 subqueries_list.push_back(subqueries); 1499 1500 subqueries.clear(); 1501 subqueries.push_back(Xapian::Query("attitud")); 1502 subqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE, 1503 Xapian::Query("german"), 1504 Xapian::Query("adventur"))); 1505 subqueries_list.push_back(subqueries); 1506 1507 for (vector<vector<Xapian::Query> >::const_iterator 1508 qlist = subqueries_list.begin(); 1509 qlist != subqueries_list.end(); ++qlist) 1510 { 1511 // Run two queries, one joining the subqueries with OR and one joining them 1512 // with SYNONYM. 1513 Xapian::Enquire enquire(db); 1514 1515 // Do the search with OR 1516 Xapian::Query orquery(Xapian::Query(Xapian::Query::OP_OR, qlist->begin(), qlist->end())); 1517 enquire.set_query(orquery); 1518 Xapian::MSet ormset = enquire.get_mset(0, lots); 1519 1520 // Do the search with synonym, getting all the results. 1521 Xapian::Query synquery(Xapian::Query::OP_SYNONYM, qlist->begin(), qlist->end()); 1522 enquire.set_query(synquery); 1523 Xapian::MSet mset = enquire.get_mset(0, lots); 1524 1525 // Check that the queries return some results. 1526 TEST_NOT_EQUAL(mset.size(), 0); 1527 // Check that the queries return the same number of results. 1528 TEST_EQUAL(mset.size(), ormset.size()); 1529 map<Xapian::docid, Xapian::weight> values_or; 1530 map<Xapian::docid, Xapian::weight> values_synonym; 1531 for (Xapian::doccount i = 0; i < mset.size(); ++i) { 1532 values_or[*ormset[i]] = ormset[i].get_weight(); 1533 values_synonym[*mset[i]] = mset[i].get_weight(); 1534 } 1535 TEST_EQUAL(values_or.size(), values_synonym.size()); 1536 1537 /* Check that the most of the weights for items in the "or" mset are 1538 * different from those in the "synonym" mset. */ 1539 int same_weight = 0; 1540 int different_weight = 0; 1541 for (map<Xapian::docid, Xapian::weight>::const_iterator 1542 j = values_or.begin(); 1543 j != values_or.end(); ++j) 1544 { 1545 Xapian::docid did = j->first; 1546 // Check that all the results in the or tree make it to the synonym tree. 1547 TEST(values_synonym.find(did) != values_synonym.end()); 1548 if (values_or[did] == values_synonym[did]) { 1549 same_weight += 1; 1550 } else { 1551 different_weight += 1; 1552 } 1553 } 1554 if (qlist->size() == 1) { 1555 // Had a single term - check that all the weights were the same. 1556 TEST_EQUAL(different_weight, 0); 1557 TEST_NOT_EQUAL(same_weight, 0); 1558 } else { 1559 // Check that most of the weights differ. 1560 TEST_NOT_EQUAL(different_weight, 0); 1561 TEST_REL(same_weight, <, different_weight); 1562 } 1563 1564 // Do the search with synonym, but just get the top result. 1565 // (Regression test - the OR subquery in the synonym postlist tree used 1566 // to shortcut incorrectly, and return the wrong result here). 1567 Xapian::MSet mset_top = enquire.get_mset(0, 1); 1568 TEST_EQUAL(mset_top.size(), 1); 1569 TEST(mset_range_is_same(mset_top, 0, mset, 0, 1)); 1570 } 1571 return true; 1572 } 1573 1574 // Regression test - test a synonym search with a MultiAndPostlist. 1575 DEFINE_TESTCASE(synonym2, backend) { 1576 Xapian::Query query; 1577 vector<Xapian::Query> subqueries; 1578 subqueries.push_back(Xapian::Query("file")); 1579 subqueries.push_back(Xapian::Query("the")); 1580 subqueries.push_back(Xapian::Query("next")); 1581 subqueries.push_back(Xapian::Query("reader")); 1582 query = Xapian::Query(Xapian::Query::OP_AND, subqueries.begin(), subqueries.end()); 1583 subqueries.clear(); 1584 subqueries.push_back(query); 1585 subqueries.push_back(Xapian::Query("gutenberg")); 1586 query = Xapian::Query(Xapian::Query::OP_SYNONYM, subqueries.begin(), subqueries.end()); 1587 1588 tout << query.get_description() << endl; 1589 1590 Xapian::Database db(get_database("etext")); 1591 Xapian::Enquire enquire(db); 1592 enquire.set_query(query); 1593 Xapian::MSet mset = enquire.get_mset(0, 10); 1594 tout << mset.get_description() << endl; 1595 1596 // Regression test that OP_SCALE_WEIGHT works with OP_SYNONYM 1597 double maxposs = mset.get_max_possible(); 1598 query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 10.0); 1599 enquire.set_query(query); 1600 mset = enquire.get_mset(0, 10); 1601 double maxposs2 = mset.get_max_possible(); 1602 1603 TEST_EQUAL_DOUBLE(maxposs * 10.0, maxposs2); 1604 1605 return true; 1606 } 1607 1460 1608 // tests that specifying a nonexistent input file throws an exception. 1461 1609 DEFINE_TESTCASE(flintdatabaseopeningerror1, flint) { 1462 1610 mkdir(".flint", 0755); -
xapian-core/tests/queryparsertest.cc
1 1 /* queryparsertest.cc: Tests of Xapian::QueryParser 2 2 * 3 3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts 4 * Copyright (C) 2007,2009 Lemur Consulting Ltd 4 5 * 5 6 * This program is free software; you can redistribute it and/or 6 7 * modify it under the terms of the GNU General Public License as … … 786 787 Xapian::Query qobj = qp.parse_query("ab*", Xapian::QueryParser::FLAG_WILDCARD); 787 788 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(abc:(pos=1))"); 788 789 qobj = qp.parse_query("muscle*", Xapian::QueryParser::FLAG_WILDCARD); 789 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) ORmusclebound:(pos=1)))");790 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) SYNONYM musclebound:(pos=1)))"); 790 791 qobj = qp.parse_query("meat*", Xapian::QueryParser::FLAG_WILDCARD); 791 792 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query()"); 792 793 qobj = qp.parse_query("musc*", Xapian::QueryParser::FLAG_WILDCARD); 793 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) ORmuscular:(pos=1)))");794 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)))"); 794 795 qobj = qp.parse_query("mutt*", Xapian::QueryParser::FLAG_WILDCARD); 795 796 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(mutton:(pos=1))"); 796 797 // Regression test (we weren't lowercasing terms before checking if they … … 879 880 qp.add_prefix("author", "A"); 880 881 Xapian::Query qobj; 881 882 qobj = qp.parse_query("author:h*", Xapian::QueryParser::FLAG_WILDCARD); 882 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) ORAhuxley:(pos=1)))");883 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)))"); 883 884 qobj = qp.parse_query("author:h* test", Xapian::QueryParser::FLAG_WILDCARD); 884 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( Aheinlein:(pos=1) OR Ahuxley:(pos=1) OR test:(pos=2)))");885 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)) OR test:(pos=2)))"); 885 886 return true; 886 887 } 887 888 … … 907 908 doc.add_term("XTcowl"); 908 909 doc.add_term("XTcox"); 909 910 doc.add_term("ZXTcow"); 911 doc.add_term("XONEpartial"); 912 doc.add_term("XONEpartial2"); 913 doc.add_term("XTWOpartial3"); 914 doc.add_term("XTWOpartial4"); 910 915 db.add_document(doc); 911 916 Xapian::QueryParser qp; 912 917 qp.set_database(db); … … 922 927 qobj = qp.parse_query("ab", Xapian::QueryParser::FLAG_PARTIAL); 923 928 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR Zab:(pos=1)))"); 924 929 qobj = qp.parse_query("muscle", Xapian::QueryParser::FLAG_PARTIAL); 925 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( muscle:(pos=1) OR musclebound:(pos=1) OR Zmuscl:(pos=1)))");930 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscle:(pos=1) SYNONYM musclebound:(pos=1)) OR Zmuscl:(pos=1)))"); 926 931 qobj = qp.parse_query("meat", Xapian::QueryParser::FLAG_PARTIAL); 927 932 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(Zmeat:(pos=1))"); 928 933 qobj = qp.parse_query("musc", Xapian::QueryParser::FLAG_PARTIAL); 929 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1) OR Zmusc:(pos=1)))");934 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)) OR Zmusc:(pos=1)))"); 930 935 qobj = qp.parse_query("mutt", Xapian::QueryParser::FLAG_PARTIAL); 931 936 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((mutton:(pos=1) OR Zmutt:(pos=1)))"); 932 937 qobj = qp.parse_query("abc musc", Xapian::QueryParser::FLAG_PARTIAL); 933 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR muscat:(pos=2) OR muscle:(pos=2) OR musclebound:(pos=2) OR muscular:(pos=2) OR Zmusc:(pos=2)))");938 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR (muscat:(pos=2) SYNONYM muscle:(pos=2) SYNONYM musclebound:(pos=2) SYNONYM muscular:(pos=2)) OR Zmusc:(pos=2)))"); 934 939 qobj = qp.parse_query("a* mutt", Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_WILDCARD); 935 940 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR mutton:(pos=2) OR Zmutt:(pos=2)))"); 936 941 937 942 // Check behaviour with stemmed terms, and stem strategy STEM_SOME. 938 943 qobj = qp.parse_query("o", Xapian::QueryParser::FLAG_PARTIAL); 939 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zo:(pos=1)))");944 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zo:(pos=1)))"); 940 945 qobj = qp.parse_query("ou", Xapian::QueryParser::FLAG_PARTIAL); 941 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zou:(pos=1)))");946 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zou:(pos=1)))"); 942 947 qobj = qp.parse_query("out", Xapian::QueryParser::FLAG_PARTIAL); 943 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zout:(pos=1)))");948 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zout:(pos=1)))"); 944 949 qobj = qp.parse_query("outs", Xapian::QueryParser::FLAG_PARTIAL); 945 950 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR Zout:(pos=1)))"); 946 951 qobj = qp.parse_query("outsi", Xapian::QueryParser::FLAG_PARTIAL); … … 952 957 953 958 // Check behaviour with capitalised terms, and stem strategy STEM_SOME. 954 959 qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL); 955 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1,wqf=2) OR outside:(pos=1)))");960 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))"); 956 961 qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL); 957 962 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR outs:(pos=1)))"); 958 963 qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL); … … 961 966 // And now with stemming strategy STEM_ALL. 962 967 qp.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); 963 968 qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL); 964 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1,wqf=2) OR outside:(pos=1)))");969 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))"); 965 970 qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL); 966 971 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR out:(pos=1)))"); 967 972 qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL); … … 970 975 // Check handling of a case with a prefix. 971 976 qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); 972 977 qobj = qp.parse_query("title:cow", Xapian::QueryParser::FLAG_PARTIAL); 973 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( XTcowl:(pos=1) OR XTcows:(pos=1) OR ZXTcow:(pos=1)))");978 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR ZXTcow:(pos=1)))"); 974 979 qobj = qp.parse_query("title:cows", Xapian::QueryParser::FLAG_PARTIAL); 975 980 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcows:(pos=1) OR ZXTcow:(pos=1)))"); 976 981 qobj = qp.parse_query("title:Cow", Xapian::QueryParser::FLAG_PARTIAL); 977 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( XTcowl:(pos=1) OR XTcows:(pos=1) OR XTcow:(pos=1)))");982 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR XTcow:(pos=1)))"); 978 983 qobj = qp.parse_query("title:Cows", Xapian::QueryParser::FLAG_PARTIAL); 979 984 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(XTcows:(pos=1,wqf=2))"); 980 985 … … 982 987 // inflate the wqf of the "parsed as normal" version of a partial term 983 988 // by multiplying it by the number of prefixes mapped to. 984 989 qobj = qp.parse_query("double:vision", Xapian::QueryParser::FLAG_PARTIAL); 985 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) OR ZXTWOvision:(pos=1)))"); 990 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) SYNONYM ZXTWOvision:(pos=1)))"); 991 992 // Test handling of FLAG_PARTIAL when there's more than one prefix. 993 qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL); 994 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (ZXONEpart:(pos=1) SYNONYM ZXTWOpart:(pos=1))))"); 995 996 // Test handling of FLAG_PARTIAL when there's more than one prefix, without 997 // stemming. 998 qp.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); 999 qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL); 1000 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpart:(pos=1) SYNONYM XTWOpart:(pos=1))))"); 1001 qobj = qp.parse_query("double:partial", Xapian::QueryParser::FLAG_PARTIAL); 1002 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpartial:(pos=1) SYNONYM XTWOpartial:(pos=1))))"); 986 1003 987 1004 return true; 988 1005 } … … 1547 1564 } 1548 1565 1549 1566 static test test_synonym_queries[] = { 1550 { "searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) ORZlocate:(pos=1))" },1551 { "search", "(Zsearch:(pos=1) ORfind:(pos=1))" },1552 { "Search", "(search:(pos=1) ORfind:(pos=1))" },1567 { "searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" }, 1568 { "search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" }, 1569 { "Search", "(search:(pos=1) SYNONYM find:(pos=1))" }, 1553 1570 { "Searching", "searching:(pos=1)" }, 1554 { "searching OR terms", "( Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },1555 { "search OR terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1556 { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) ORfind:(pos=1)))" },1557 { "search -terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_NOT Zterm:(pos=2))" },1558 { "+search terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_MAYBE Zterm:(pos=2))" },1559 { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) ORfind:(pos=1)))" },1560 { "search terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1571 { "searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" }, 1572 { "search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1573 { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1574 { "search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" }, 1575 { "+search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" }, 1576 { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1577 { "search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1561 1578 // Shouldn't trigger synonyms: 1562 1579 { "\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" }, 1563 1580 { NULL, NULL } … … 1597 1614 1598 1615 static test test_multi_synonym_queries[] = { 1599 1616 { "sun OR tan OR cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3))" }, 1600 { "sun tan", "( Zsun:(pos=1) OR Ztan:(pos=2) ORbathe:(pos=1))" },1601 { "sun tan cream", "( Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3) ORlotion:(pos=1))" },1602 { "beach sun tan holiday", "(Zbeach:(pos=1) OR Zsun:(pos=2) OR Ztan:(pos=3) OR bathe:(pos=2) OR Zholiday:(pos=4))" },1603 { "sun tan sun tan cream", "( Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1) OR Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5) OR lotion:(pos=3))" },1604 { "single", "(Zsingl:(pos=1) ORrecord:(pos=1))" },1617 { "sun tan", "((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1))" }, 1618 { "sun tan cream", "((Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3)) SYNONYM lotion:(pos=1))" }, 1619 { "beach sun tan holiday", "(Zbeach:(pos=1) OR ((Zsun:(pos=2) OR Ztan:(pos=3)) SYNONYM bathe:(pos=2)) OR Zholiday:(pos=4))" }, 1620 { "sun tan sun tan cream", "(((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1)) OR ((Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5)) SYNONYM lotion:(pos=3)))" }, 1621 { "single", "(Zsingl:(pos=1) SYNONYM record:(pos=1))" }, 1605 1622 { NULL, NULL } 1606 1623 }; 1607 1624 … … 1640 1657 1641 1658 static test test_synonym_op_queries[] = { 1642 1659 { "searching", "Zsearch:(pos=1)" }, 1643 { "~searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) ORZlocate:(pos=1))" },1644 { "~search", "(Zsearch:(pos=1) ORfind:(pos=1))" },1645 { "~Search", "(search:(pos=1) ORfind:(pos=1))" },1660 { "~searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" }, 1661 { "~search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" }, 1662 { "~Search", "(search:(pos=1) SYNONYM find:(pos=1))" }, 1646 1663 { "~Searching", "searching:(pos=1)" }, 1647 { "~searching OR terms", "( Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },1648 { "~search OR terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1649 { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) ORfind:(pos=1)))" },1650 { "~search -terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_NOT Zterm:(pos=2))" },1651 { "+~search terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_MAYBE Zterm:(pos=2))" },1652 { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) ORfind:(pos=1)))" },1653 { "~search terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1664 { "~searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" }, 1665 { "~search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1666 { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1667 { "~search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" }, 1668 { "+~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" }, 1669 { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1670 { "~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1654 1671 // FIXME: should look for multi-term synonym... 1655 1672 { "~\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" }, 1656 1673 { NULL, NULL } -
xapian-core/include/xapian/query.h
119 119 OP_VALUE_GE, 120 120 121 121 /** Filter by a less-than-or-equal test on a document value. */ 122 OP_VALUE_LE 122 OP_VALUE_LE, 123 124 /** Treat a set of queries as synonyms. 125 * 126 * This returns all results which match at least one of the 127 * queries, but weighting as if all the sub-queries are instances 128 * of the same term: so multiple matching terms for a document 129 * increase the wdf value used, and the term frequency is based on 130 * the number of documents which would match an OR of all the 131 * subqueries. 132 * 133 * The term frequency used will usually be an approximation, 134 * because calculating the precise combined term frequency would 135 * be overly expensive. 136 * 137 * Identical to OP_OR, except for the weightings returned. 138 */ 139 OP_SYNONYM 123 140 } op; 124 141 125 142 /** Copy constructor. */ -
xapian-core/include/xapian/weight.h
2 2 * @brief Weighting scheme API. 3 3 */ 4 4 /* Copyright (C) 2007,2008,2009 Olly Betts 5 * Copyright (C) 2009 Lemur Consulting Ltd 5 6 * 6 7 * This program is free software; you can redistribute it and/or 7 8 * modify it under the terms of the GNU General Public License as … … 212 213 const std::string & term, Xapian::termcount wqf_, 213 214 double factor); 214 215 216 /** @private @internal Initialise this object to calculate weights for a 217 * synonym. 218 * 219 * @param stats Source of statistics. 220 * @param query_len_ Query length. 221 * @param factor Any scaling factor (e.g. from OP_SCALE_WEIGHT). 222 * @param termfreq The termfreq to use. 223 */ 224 void init_(const Internal & stats, Xapian::termcount query_len_, 225 double factor, Xapian::doccount termfreq); 226 215 227 /** @private @internal Initialise this object to calculate the extra weight 216 228 * component. 217 229 * … … 230 242 return stats_needed & DOC_LENGTH; 231 243 } 232 244 245 /** @private @internal Return true if the WDF is needed. 246 * 247 * If this method returns true, then the WDF will be fetched and passed to 248 * @a get_sumpart(). Otherwise 0 may be passed for the wdf. 249 */ 250 bool get_sumpart_needs_wdf_() const { 251 return stats_needed & WDF; 252 } 253 233 254 protected: 234 255 /// Only allow subclasses to copy us. 235 256 Weight(const Weight &); -
xapian-core/api/omqueryinternal.cc
65 65 case Xapian::Query::OP_VALUE_RANGE: 66 66 case Xapian::Query::OP_VALUE_GE: 67 67 case Xapian::Query::OP_VALUE_LE: 68 case Xapian::Query::OP_SYNONYM: 68 69 return 0; 69 70 case Xapian::Query::OP_SCALE_WEIGHT: 70 71 return 1; … … 100 101 case Xapian::Query::OP_NEAR: 101 102 case Xapian::Query::OP_PHRASE: 102 103 case Xapian::Query::OP_ELITE_SET: 104 case Xapian::Query::OP_SYNONYM: 103 105 return UINT_MAX; 104 106 default: 105 107 Assert(false); … … 221 223 result += "."; 222 224 result += str_parameter; // serialise_double(get_dbl_parameter()); 223 225 break; 226 case Xapian::Query::OP_SYNONYM: 227 result += "="; 228 break; 224 229 } 225 230 } 226 231 return result; … … 251 256 case Xapian::Query::OP_VALUE_GE: name = "VALUE_GE"; break; 252 257 case Xapian::Query::OP_VALUE_LE: name = "VALUE_LE"; break; 253 258 case Xapian::Query::OP_SCALE_WEIGHT: name = "SCALE_WEIGHT"; break; 259 case Xapian::Query::OP_SYNONYM: name = "SYNONYM"; break; 254 260 } 255 261 return name; 256 262 } … … 584 590 return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT, 585 591 subqs, 0, param); 586 592 } 587 default: 593 case '=': { 594 return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs); 595 } 596 default: 588 597 LOGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'"); 589 598 throw Xapian::InvalidArgumentError("Invalid query string"); 590 599 } … … 809 818 case OP_ELITE_SET: 810 819 case OP_OR: 811 820 case OP_XOR: 821 case OP_SYNONYM: 812 822 // Doing an "OR" type operation - if we've got any MatchNothing 813 823 // subnodes, drop them; except that we mustn't become an empty 814 824 // node due to this, so we never drop a MatchNothing subnode … … 900 910 } 901 911 } 902 912 break; 903 case OP_OR: case OP_AND: case OP_XOR: 913 case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM: 904 914 // Remove duplicates if we can. 905 915 if (subqs.size() > 1) collapse_subqs(); 906 916 break; … … 944 954 void 945 955 Xapian::Query::Internal::collapse_subqs() 946 956 { 947 Assert(op == OP_OR || op == OP_AND || op == OP_XOR );957 Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM); 948 958 typedef set<Xapian::Query::Internal *, SortPosName> subqtable; 949 959 subqtable sqtab; 950 960 … … 1038 1048 Assert(!is_leaf(op)); 1039 1049 if (subq == 0) { 1040 1050 subqs.push_back(0); 1041 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR )) {1051 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) { 1042 1052 // Distribute the subquery. 1043 1053 for (subquery_list::const_iterator i = subq->subqs.begin(); 1044 1054 i != subq->subqs.end(); i++) { -
xapian-bindings/python/smoketest2.py
213 213 qp.set_stemming_strategy(qp.STEM_SOME) 214 214 qp.set_stemmer(xapian.Stem('en')) 215 215 expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), 216 "(Zfoo:(pos=1) AND ( out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")216 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))") 217 217 218 218 expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), 219 219 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))") -
xapian-bindings/python/smoketest3.py
153 153 154 154 # Feature test for Document.values 155 155 count = 0 156 for term in doc.values():156 for term in list(doc.values()): 157 157 count += 1 158 158 expect(count, 0, "Unexpected number of entries in doc.values") 159 159 … … 213 213 qp.set_stemming_strategy(qp.STEM_SOME) 214 214 qp.set_stemmer(xapian.Stem('en')) 215 215 expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), 216 "(Zfoo:(pos=1) AND ( out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")216 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))") 217 217 218 218 expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), 219 219 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")