Ticket #50: opsynonym_changes_12434_12435.patch
File opsynonym_changes_12434_12435.patch, 39.7 KB (added by , 16 years ago) |
---|
-
xapian-maintainer-tools/win32msvc/win32_matcher.mak
34 34 $(INTDIR)\queryoptimiser.obj\ 35 35 $(INTDIR)\rset.obj\ 36 36 $(INTDIR)\selectpostlist.obj\ 37 $(INTDIR)\synonympostlist.obj\ 37 38 $(INTDIR)\valuerangepostlist.obj\ 38 39 $(INTDIR)\valuegepostlist.obj\ 39 40 $(INTDIR)\xorpostlist.obj\ … … 60 61 $(INTDIR)\queryoptimiser.cc\ 61 62 $(INTDIR)\rset.cc\ 62 63 $(INTDIR)\selectpostlist.cc\ 64 $(INTDIR)\synonympostlist.cc\ 63 65 $(INTDIR)\valuerangepostlist.cc\ 64 66 $(INTDIR)\valuegepostlist.cc\ 65 67 $(INTDIR)\xorpostlist.cc\ -
xapian-core/queryparser/queryparser.lemony
279 279 end = db.synonyms_end(term); 280 280 } 281 281 while (syn != end) { 282 q = Query(Query::OP_ OR, q, Query(*syn, 1, pos));282 q = Query(Query::OP_SYNONYM, q, Query(*syn, 1, pos)); 283 283 ++syn; 284 284 } 285 285 } … … 345 345 } 346 346 } 347 347 delete this; 348 return new Query(Query::OP_ OR, subqs.begin(), subqs.end());348 return new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end()); 349 349 } 350 350 351 351 Query * … … 357 357 for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) { 358 358 string root = *piter; 359 359 root += name; 360 vector<Query> subqs2; 360 361 TermIterator t = db.allterms_begin(root); 361 362 while (t != db.allterms_end(root)) { 362 subqs .push_back(Query(*t, 1, pos));363 subqs2.push_back(Query(*t, 1, pos)); 363 364 ++t; 364 365 } 366 subqs.push_back(Query(Query::OP_SYNONYM, subqs2.begin(), subqs2.end())); 365 367 // Add the term, as it would normally be handled, as an alternative. 366 368 subqs.push_back(Query(make_term(*piter), 1, pos)); 367 369 } … … 1168 1170 subqs2.push_back(Query(*syn, 1, pos)); 1169 1171 ++syn; 1170 1172 } 1171 Query q_synonym_terms(Query::OP_ OR, subqs2.begin(), subqs2.end());1173 Query q_synonym_terms(Query::OP_SYNONYM, subqs2.begin(), subqs2.end()); 1172 1174 subqs2.clear(); 1173 subqs.push_back(Query(Query::OP_ OR,1175 subqs.push_back(Query(Query::OP_SYNONYM, 1174 1176 q_original_terms, q_synonym_terms)); 1175 1177 } 1176 1178 } else { -
xapian-core/matcher/Makefile.mk
18 18 matcher/queryoptimiser.h\ 19 19 matcher/remotesubmatch.h\ 20 20 matcher/selectpostlist.h\ 21 matcher/synonympostlist.h\ 21 22 matcher/valuegepostlist.h\ 22 23 matcher/valuerangepostlist.h\ 23 24 matcher/xorpostlist.h … … 54 55 matcher/queryoptimiser.cc\ 55 56 matcher/rset.cc\ 56 57 matcher/selectpostlist.cc\ 58 matcher/synonympostlist.cc\ 57 59 matcher/valuegepostlist.cc\ 58 60 matcher/valuerangepostlist.cc\ 59 61 matcher/xorpostlist.cc -
xapian-core/matcher/multimatch.cc
791 791 LOGVALUE(MATCH, denom); 792 792 LOGVALUE(MATCH, percent_scale); 793 793 Assert(percent_scale <= denom); 794 denom *= greatest_wt; 795 Assert(denom > 0); 796 percent_scale /= denom; 794 if (denom == 0) { 795 percent_scale = 1.0 / greatest_wt; 796 } else { 797 denom *= greatest_wt; 798 Assert(denom > 0); 799 percent_scale /= denom; 800 } 797 801 } else { 798 802 // If all the terms match, the 2 sums of weights cancel 799 803 percent_scale = 1.0 / greatest_wt; -
xapian-core/matcher/localmatch.cc
31 31 #include "omdebug.h" 32 32 #include "omqueryinternal.h" 33 33 #include "queryoptimiser.h" 34 #include "synonympostlist.h" 34 35 #include "weightinternal.h" 35 36 36 37 #include <cfloat> … … 111 112 } 112 113 113 114 PostList * 115 LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher, 116 double factor) 117 { 118 DEBUGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist", 119 "[or_pl], [matcher], " << factor); 120 LOGVALUE(MATCH, or_pl->get_termfreq_est()); 121 AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher)); 122 AutoPtr<Xapian::Weight> wt(wt_factory->clone_()); 123 124 // FIXME - calculate the reltermfreq to use and pass it in? 125 wt->init_(*stats, qlen, factor, or_pl->get_termfreq_est()); 126 127 res->set_weight(wt.release()); 128 RETURN(res.release()); 129 } 130 131 PostList * 114 132 LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query, 115 133 double factor) 116 134 { … … 132 150 Xapian::doccount tf = stats->get_termfreq(query->tname); 133 151 Xapian::weight weight = boolean ? 0 : wt->get_maxpart(); 134 152 Xapian::MSet::Internal::TermFreqAndWeight info(tf, weight); 153 LOGLINE(MATCH, "Setting term_info[" << query->tname << "] to (" << tf << ", " << weight << ")"); 135 154 term_info.insert(make_pair(query->tname, info)); 136 155 } else if (!boolean) { 137 156 i->second.termweight += wt->get_maxpart(); 157 AssertEq(stats->get_termfreq(query->tname), i->second.termfreq); 158 LOGLINE(MATCH, "Increasing term_info[" << query->tname << "] to (" << i->second.termfreq << ", " << i->second.termweight << ")"); 138 159 } 139 160 140 161 LeafPostList * pl = db->open_post_list(query->tname); -
xapian-core/matcher/localmatch.h
82 82 PostList * get_postlist_and_term_info(MultiMatch *matcher, 83 83 std::map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts); 84 84 85 /** Convert a postlist into a synonym postlist. 86 */ 87 PostList * make_synonym_postlist(PostList * or_pl, MultiMatch * matcher, 88 double factor); 89 85 90 /** Convert an OP_LEAF query to a PostList. 86 91 * 87 92 * This is called by QueryOptimiser when it reaches an OP_LEAF query. -
xapian-core/matcher/synonympostlist.h
1 /** @file synonympostlist.h 2 * @brief Combine subqueries, weighting as if they are synonyms 3 */ 4 /* Copyright 2007 Lemur Consulting Ltd 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 22 #define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 23 24 #include "multimatch.h" 25 #include "postlist.h" 26 27 /** A postlist comprising several postlists SYNONYMed together. 28 * 29 * This postlist returns all postings in the OR of the sub postlists, but 30 * returns weights as if they represented a single term. The term frequency 31 * portion of the weight is approximated. 32 */ 33 class SynonymPostList : public PostList { 34 /** The subtree, which starts as an OR of all the sub-postlists being 35 * joined with Synonym, but may decay into something else. 36 */ 37 PostList * subtree; 38 39 /** The object which is using this postlist to perform a match. 40 * 41 * This object needs to be notified when the tree changes such that the 42 * maximum weights need to be recalculated. 43 */ 44 MultiMatch *matcher; 45 46 /** Weighting object used for calculating the synonym weights. 47 */ 48 const Xapian::Weight * wt; 49 50 /** Flag indicating whether the weighting object needs the doclength. 51 */ 52 bool want_doclength; 53 54 public: 55 SynonymPostList(PostList *subtree_, MultiMatch * matcher_); 56 57 ~SynonymPostList(); 58 59 /** Set the weight object to be used for the synonym postlist. 60 * 61 * Ownership of the weight object passes to the synonym postlist - the 62 * caller must not delete it after use. 63 */ 64 void set_weight(const Xapian::Weight * wt_); 65 66 PostList *next(Xapian::weight w_min); 67 PostList *skip_to(Xapian::docid did, Xapian::weight w_min); 68 69 Xapian::weight get_weight() const; 70 Xapian::weight get_maxweight() const; 71 Xapian::weight recalc_maxweight(); 72 73 // The following methods just call through to the subtree. 74 Xapian::termcount get_wdf() const; 75 Xapian::doccount get_termfreq_min() const; 76 Xapian::doccount get_termfreq_est() const; 77 Xapian::doccount get_termfreq_max() const; 78 Xapian::docid get_docid() const; 79 Xapian::termcount get_doclength() const; 80 PositionList * read_position_list(); 81 PositionList * open_position_list() const; 82 bool at_end() const; 83 84 std::string get_description() const; 85 }; 86 87 #endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */ -
xapian-core/matcher/queryoptimiser.cc
Property changes on: xapian-core/matcher/synonympostlist.h ___________________________________________________________________ Added: svn:eol-style + native
122 122 RETURN(do_subquery(query->subqs[0], sub_factor)); 123 123 } 124 124 125 case Xapian::Query::OP_SYNONYM: { 126 RETURN(do_synonym(query, factor)); 127 } 128 125 129 default: 126 130 Assert(false); 127 131 RETURN(NULL); … … 304 308 // for AND-like operations. 305 309 Xapian::Query::Internal::op_t op = query->op; 306 310 Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR || 307 op == Xapian::Query::OP_XOR); 311 op == Xapian::Query::OP_XOR || op == Xapian::Query::OP_SYNONYM); 312 313 // We build an OR tree for OP_SYNONYM. (The resulting tree will then be 314 // passed into a SynonymPostList, from which the weightings will come.) 315 if (op == Xapian::Query::OP_SYNONYM) { 316 op = Xapian::Query::OP_OR; 317 } 308 318 309 319 const Xapian::Query::Internal::subquery_list &queries = query->subqs; 310 320 AssertRel(queries.size(), >=, 2); … … 382 392 ComparePostListTermFreqAscending()); 383 393 } 384 394 } 395 396 PostList * 397 QueryOptimiser::do_synonym(const Xapian::Query::Internal *query, double factor) 398 { 399 DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_synonym", 400 query << ", " << factor); 401 402 if (factor == 0.0) { 403 // If we have a factor of 0, we don't care about the weights, so 404 // we're just like a normal OR query. 405 RETURN(do_or_like(query, 0.0)); 406 } 407 408 AssertEq(query->wqf, 0); // FIXME - should we be doing something with the wqf? 409 410 RETURN(localsubmatch.make_synonym_postlist(do_or_like(query, 0.0), 411 matcher, factor)); 412 } -
xapian-core/matcher/queryoptimiser.h
88 88 */ 89 89 PostList * do_or_like(const Xapian::Query::Internal *query, double factor); 90 90 91 /** Optimise a synonym Xapian::Query::Internal subtree into a PostList 92 * 93 * @param query The subtree to optimise. 94 * @param factor How much to scale weights for this subtree by. 95 * 96 * @return A PostList subtree. 97 */ 98 PostList * do_synonym(const Xapian::Query::Internal *query, double factor); 99 91 100 public: 92 101 QueryOptimiser(const Xapian::Database::Internal & db_, 93 102 LocalSubMatch & localsubmatch_, -
xapian-core/matcher/synonympostlist.cc
1 /* synonympostlist.cc: Combine subqueries, weighting as if they are synonyms 2 * 3 * Copyright 2007 Lemur Consulting Ltd 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the 8 * License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 18 * USA 19 */ 20 21 #include <config.h> 22 23 #include "synonympostlist.h" 24 25 #include "branchpostlist.h" 26 #include "debuglog.h" 27 28 SynonymPostList::SynonymPostList(PostList *subtree_, 29 MultiMatch * matcher_) 30 : subtree(subtree_), 31 matcher(matcher_), 32 wt(NULL), 33 want_doclength(false) 34 { 35 } 36 37 SynonymPostList::~SynonymPostList() 38 { 39 delete wt; 40 delete subtree; 41 } 42 43 void 44 SynonymPostList::set_weight(const Xapian::Weight * wt_) 45 { 46 delete wt; 47 wt = wt_; 48 want_doclength = wt_->get_sumpart_needs_doclength_(); 49 } 50 51 PostList * 52 SynonymPostList::next(Xapian::weight w_min) 53 { 54 LOGCALL(MATCH, PostList *, "SynonymPostList::next", w_min); 55 next_handling_prune(subtree, w_min, matcher); 56 RETURN(NULL); 57 } 58 59 PostList * 60 SynonymPostList::skip_to(Xapian::docid did, Xapian::weight w_min) 61 { 62 LOGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did << ", " << w_min); 63 skip_to_handling_prune(subtree, did, w_min, matcher); 64 RETURN(NULL); 65 } 66 67 Xapian::weight 68 SynonymPostList::get_weight() const 69 { 70 return wt->get_sumpart(get_wdf(), want_doclength ? get_doclength() : 0); 71 } 72 73 Xapian::weight 74 SynonymPostList::get_maxweight() const 75 { 76 return wt->get_maxpart(); 77 } 78 79 Xapian::weight 80 SynonymPostList::recalc_maxweight() 81 { 82 return SynonymPostList::get_maxweight(); 83 } 84 85 Xapian::termcount 86 SynonymPostList::get_wdf() const { 87 return subtree->get_wdf(); 88 } 89 90 Xapian::doccount 91 SynonymPostList::get_termfreq_min() const { 92 return subtree->get_termfreq_min(); 93 } 94 95 Xapian::doccount 96 SynonymPostList::get_termfreq_est() const { 97 return subtree->get_termfreq_est(); 98 } 99 100 Xapian::doccount 101 SynonymPostList::get_termfreq_max() const { 102 return subtree->get_termfreq_max(); 103 } 104 105 Xapian::docid 106 SynonymPostList::get_docid() const { 107 return subtree->get_docid(); 108 } 109 110 Xapian::termcount 111 SynonymPostList::get_doclength() const { 112 return subtree->get_doclength(); 113 } 114 115 PositionList * 116 SynonymPostList::read_position_list() { 117 return subtree->read_position_list(); 118 } 119 120 PositionList * 121 SynonymPostList::open_position_list() const { 122 return subtree->open_position_list(); 123 } 124 125 bool 126 SynonymPostList::at_end() const { 127 return subtree->at_end(); 128 } 129 130 std::string 131 SynonymPostList::get_description() const 132 { 133 return "(Synonym " + subtree->get_description() + ")"; 134 } -
xapian-core/weight/weight.cc
Property changes on: xapian-core/matcher/synonympostlist.cc ___________________________________________________________________ Added: svn:eol-style + native
73 73 init(factor); 74 74 } 75 75 76 void 77 Weight::init_(const Internal & stats, Xapian::termcount query_length, 78 double factor, Xapian::doccount termfreq) 79 { 80 // Synonym case. 81 collection_size_ = stats.collection_size; 82 rset_size_ = stats.rset_size; 83 average_length_ = stats.get_average_length(); 84 doclength_upper_bound_ = stats.db.get_doclength_upper_bound(); 85 doclength_lower_bound_ = stats.db.get_doclength_lower_bound(); 86 // For a synonym, the doclength is an upper bound on the wdf. 87 // FIXME: foo OP_SYNONYM foo could exceed this, but we probably need to 88 // handle repeated terms better somehow. 89 wdf_upper_bound_ = stats.db.get_doclength_upper_bound(); 90 termfreq_ = termfreq; 91 reltermfreq_ = 0; 92 query_length_ = query_length; 93 wqf_ = 1; 94 init(factor); 95 } 96 76 97 Weight::~Weight() { } 77 98 78 99 } -
xapian-core/tests/api_db.cc
1457 1457 return true; 1458 1458 } 1459 1459 1460 // Check a synonym search 1461 DEFINE_TESTCASE(synonym1, backend) { 1462 Xapian::Database db(get_database("etext")); 1463 Xapian::doccount lots = 214; 1464 vector<vector<Xapian::Query> > subqueries_list; 1465 1466 vector<Xapian::Query> subqueries; 1467 subqueries.push_back(Xapian::Query("date")); 1468 subqueries_list.push_back(subqueries); 1469 1470 subqueries.clear(); 1471 subqueries.push_back(Xapian::Query("sky")); 1472 subqueries.push_back(Xapian::Query("date")); 1473 subqueries_list.push_back(subqueries); 1474 1475 subqueries.clear(); 1476 subqueries.push_back(Xapian::Query("date")); 1477 subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 1478 Xapian::Query("sky"), 1479 Xapian::Query("glove"))); 1480 subqueries_list.push_back(subqueries); 1481 1482 subqueries.clear(); 1483 subqueries.push_back(Xapian::Query("sky")); 1484 subqueries.push_back(Xapian::Query("date")); 1485 subqueries.push_back(Xapian::Query("stein")); 1486 subqueries.push_back(Xapian::Query("ally")); 1487 subqueries_list.push_back(subqueries); 1488 1489 subqueries.clear(); 1490 subqueries.push_back(Xapian::Query("sky")); 1491 subqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE, 1492 Xapian::Query("date"), 1493 Xapian::Query("stein"))); 1494 subqueries_list.push_back(subqueries); 1495 1496 for (vector<vector<Xapian::Query> >::const_iterator 1497 qlist = subqueries_list.begin(); 1498 qlist != subqueries_list.end(); ++qlist) 1499 { 1500 // Run two queries, one joining the subqueries with OR and one joining them 1501 // with SYNONYM. 1502 Xapian::Enquire enquire(db); 1503 enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, qlist->begin(), qlist->end())); 1504 Xapian::MSet ormset = enquire.get_mset(0, lots); 1505 Xapian::Query synquery(Xapian::Query::OP_SYNONYM, qlist->begin(), qlist->end()); 1506 tout << synquery << "\n"; 1507 enquire.set_query(synquery); 1508 Xapian::MSet mset = enquire.get_mset(0, lots); 1509 1510 // Check that the queries return some results. 1511 TEST_NOT_EQUAL(mset.size(), 0); 1512 // Check that the queries return the same number of results. 1513 TEST_EQUAL(mset.size(), ormset.size()); 1514 map<Xapian::docid, Xapian::weight> values_or; 1515 map<Xapian::docid, Xapian::weight> values_synonym; 1516 for (Xapian::doccount i = 0; i < mset.size(); ++i) { 1517 values_or[*ormset[i]] = ormset[i].get_weight(); 1518 values_synonym[*mset[i]] = mset[i].get_weight(); 1519 } 1520 TEST_EQUAL(values_or.size(), values_synonym.size()); 1521 1522 /* Check that the most of the weights for items in the "or" mset are 1523 * different from those in the "synonym" mset. */ 1524 int same_weight = 0; 1525 int different_weight = 0; 1526 for (map<Xapian::docid, Xapian::weight>::const_iterator 1527 j = values_or.begin(); 1528 j != values_or.end(); ++j) 1529 { 1530 Xapian::docid did = j->first; 1531 // Check that all the results in the or tree make it to the synonym tree. 1532 TEST(values_synonym.find(did) != values_synonym.end()); 1533 if (values_or[did] == values_synonym[did]) { 1534 same_weight += 1; 1535 } else { 1536 different_weight += 1; 1537 } 1538 } 1539 if (qlist->size() == 1) { 1540 // Had a single term - check that all the weights were the same. 1541 TEST_EQUAL(different_weight, 0); 1542 TEST_NOT_EQUAL(same_weight, 0); 1543 } else { 1544 // Check that most of the weights differ. 1545 TEST_NOT_EQUAL(different_weight, 0); 1546 TEST_REL(same_weight, <, different_weight); 1547 } 1548 } 1549 return true; 1550 } 1551 1552 // Regression test - test a synonym search with a MultiAndPostlist. 1553 DEFINE_TESTCASE(synonym2, backend) { 1554 Xapian::Query query; 1555 vector<Xapian::Query> subqueries; 1556 subqueries.push_back(Xapian::Query("file")); 1557 subqueries.push_back(Xapian::Query("the")); 1558 subqueries.push_back(Xapian::Query("next")); 1559 subqueries.push_back(Xapian::Query("reader")); 1560 query = Xapian::Query(Xapian::Query::OP_AND, subqueries.begin(), subqueries.end()); 1561 subqueries.clear(); 1562 subqueries.push_back(query); 1563 subqueries.push_back(Xapian::Query("gutenberg")); 1564 query = Xapian::Query(Xapian::Query::OP_SYNONYM, subqueries.begin(), subqueries.end()); 1565 1566 tout << query.get_description() << endl; 1567 1568 Xapian::Database db(get_database("etext")); 1569 Xapian::Enquire enquire(db); 1570 enquire.set_query(query); 1571 Xapian::MSet mset = enquire.get_mset(0, 10); 1572 tout << mset.get_description() << endl; 1573 1574 // Regression test that OP_SCALE_WEIGHT works with OP_SYNONYM 1575 double maxposs = mset.get_max_possible(); 1576 query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 10.0); 1577 enquire.set_query(query); 1578 mset = enquire.get_mset(0, 10); 1579 double maxposs2 = mset.get_max_possible(); 1580 1581 TEST_EQUAL_DOUBLE(maxposs * 10.0, maxposs2); 1582 1583 return true; 1584 } 1585 1460 1586 // tests that specifying a nonexistent input file throws an exception. 1461 1587 DEFINE_TESTCASE(flintdatabaseopeningerror1, flint) { 1462 1588 mkdir(".flint", 0755); -
xapian-core/tests/queryparsertest.cc
786 786 Xapian::Query qobj = qp.parse_query("ab*", Xapian::QueryParser::FLAG_WILDCARD); 787 787 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(abc:(pos=1))"); 788 788 qobj = qp.parse_query("muscle*", Xapian::QueryParser::FLAG_WILDCARD); 789 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) ORmusclebound:(pos=1)))");789 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) SYNONYM musclebound:(pos=1)))"); 790 790 qobj = qp.parse_query("meat*", Xapian::QueryParser::FLAG_WILDCARD); 791 791 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query()"); 792 792 qobj = qp.parse_query("musc*", Xapian::QueryParser::FLAG_WILDCARD); 793 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) ORmuscular:(pos=1)))");793 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)))"); 794 794 qobj = qp.parse_query("mutt*", Xapian::QueryParser::FLAG_WILDCARD); 795 795 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(mutton:(pos=1))"); 796 796 // Regression test (we weren't lowercasing terms before checking if they … … 879 879 qp.add_prefix("author", "A"); 880 880 Xapian::Query qobj; 881 881 qobj = qp.parse_query("author:h*", Xapian::QueryParser::FLAG_WILDCARD); 882 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) ORAhuxley:(pos=1)))");882 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)))"); 883 883 qobj = qp.parse_query("author:h* test", Xapian::QueryParser::FLAG_WILDCARD); 884 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( Aheinlein:(pos=1) OR Ahuxley:(pos=1) OR test:(pos=2)))");884 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)) OR test:(pos=2)))"); 885 885 return true; 886 886 } 887 887 … … 922 922 qobj = qp.parse_query("ab", Xapian::QueryParser::FLAG_PARTIAL); 923 923 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR Zab:(pos=1)))"); 924 924 qobj = qp.parse_query("muscle", Xapian::QueryParser::FLAG_PARTIAL); 925 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( muscle:(pos=1) OR musclebound:(pos=1) OR Zmuscl:(pos=1)))");925 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscle:(pos=1) SYNONYM musclebound:(pos=1)) OR Zmuscl:(pos=1)))"); 926 926 qobj = qp.parse_query("meat", Xapian::QueryParser::FLAG_PARTIAL); 927 927 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(Zmeat:(pos=1))"); 928 928 qobj = qp.parse_query("musc", Xapian::QueryParser::FLAG_PARTIAL); 929 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1) OR Zmusc:(pos=1)))");929 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)) OR Zmusc:(pos=1)))"); 930 930 qobj = qp.parse_query("mutt", Xapian::QueryParser::FLAG_PARTIAL); 931 931 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((mutton:(pos=1) OR Zmutt:(pos=1)))"); 932 932 qobj = qp.parse_query("abc musc", Xapian::QueryParser::FLAG_PARTIAL); 933 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR muscat:(pos=2) OR muscle:(pos=2) OR musclebound:(pos=2) OR muscular:(pos=2) OR Zmusc:(pos=2)))");933 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR (muscat:(pos=2) SYNONYM muscle:(pos=2) SYNONYM musclebound:(pos=2) SYNONYM muscular:(pos=2)) OR Zmusc:(pos=2)))"); 934 934 qobj = qp.parse_query("a* mutt", Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_WILDCARD); 935 935 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR mutton:(pos=2) OR Zmutt:(pos=2)))"); 936 936 937 937 // Check behaviour with stemmed terms, and stem strategy STEM_SOME. 938 938 qobj = qp.parse_query("o", Xapian::QueryParser::FLAG_PARTIAL); 939 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zo:(pos=1)))");939 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zo:(pos=1)))"); 940 940 qobj = qp.parse_query("ou", Xapian::QueryParser::FLAG_PARTIAL); 941 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zou:(pos=1)))");941 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zou:(pos=1)))"); 942 942 qobj = qp.parse_query("out", Xapian::QueryParser::FLAG_PARTIAL); 943 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1) OR outside:(pos=1) OR Zout:(pos=1)))");943 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zout:(pos=1)))"); 944 944 qobj = qp.parse_query("outs", Xapian::QueryParser::FLAG_PARTIAL); 945 945 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR Zout:(pos=1)))"); 946 946 qobj = qp.parse_query("outsi", Xapian::QueryParser::FLAG_PARTIAL); … … 952 952 953 953 // Check behaviour with capitalised terms, and stem strategy STEM_SOME. 954 954 qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL); 955 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1,wqf=2) OR outside:(pos=1)))");955 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))"); 956 956 qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL); 957 957 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR outs:(pos=1)))"); 958 958 qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL); … … 961 961 // And now with stemming strategy STEM_ALL. 962 962 qp.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); 963 963 qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL); 964 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( out:(pos=1,wqf=2) OR outside:(pos=1)))");964 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))"); 965 965 qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL); 966 966 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR out:(pos=1)))"); 967 967 qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL); … … 970 970 // Check handling of a case with a prefix. 971 971 qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); 972 972 qobj = qp.parse_query("title:cow", Xapian::QueryParser::FLAG_PARTIAL); 973 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( XTcowl:(pos=1) OR XTcows:(pos=1) OR ZXTcow:(pos=1)))");973 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR ZXTcow:(pos=1)))"); 974 974 qobj = qp.parse_query("title:cows", Xapian::QueryParser::FLAG_PARTIAL); 975 975 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcows:(pos=1) OR ZXTcow:(pos=1)))"); 976 976 qobj = qp.parse_query("title:Cow", Xapian::QueryParser::FLAG_PARTIAL); 977 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(( XTcowl:(pos=1) OR XTcows:(pos=1) OR XTcow:(pos=1)))");977 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR XTcow:(pos=1)))"); 978 978 qobj = qp.parse_query("title:Cows", Xapian::QueryParser::FLAG_PARTIAL); 979 979 TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(XTcows:(pos=1,wqf=2))"); 980 980 … … 1547 1547 } 1548 1548 1549 1549 static test test_synonym_queries[] = { 1550 { "searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) ORZlocate:(pos=1))" },1551 { "search", "(Zsearch:(pos=1) ORfind:(pos=1))" },1552 { "Search", "(search:(pos=1) ORfind:(pos=1))" },1550 { "searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" }, 1551 { "search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" }, 1552 { "Search", "(search:(pos=1) SYNONYM find:(pos=1))" }, 1553 1553 { "Searching", "searching:(pos=1)" }, 1554 { "searching OR terms", "( Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },1555 { "search OR terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1556 { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) ORfind:(pos=1)))" },1557 { "search -terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_NOT Zterm:(pos=2))" },1558 { "+search terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_MAYBE Zterm:(pos=2))" },1559 { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) ORfind:(pos=1)))" },1560 { "search terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1554 { "searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" }, 1555 { "search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1556 { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1557 { "search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" }, 1558 { "+search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" }, 1559 { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1560 { "search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1561 1561 // Shouldn't trigger synonyms: 1562 1562 { "\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" }, 1563 1563 { NULL, NULL } … … 1597 1597 1598 1598 static test test_multi_synonym_queries[] = { 1599 1599 { "sun OR tan OR cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3))" }, 1600 { "sun tan", "( Zsun:(pos=1) OR Ztan:(pos=2) ORbathe:(pos=1))" },1601 { "sun tan cream", "( Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3) ORlotion:(pos=1))" },1602 { "beach sun tan holiday", "(Zbeach:(pos=1) OR Zsun:(pos=2) OR Ztan:(pos=3) OR bathe:(pos=2) OR Zholiday:(pos=4))" },1603 { "sun tan sun tan cream", "( Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1) OR Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5) OR lotion:(pos=3))" },1604 { "single", "(Zsingl:(pos=1) ORrecord:(pos=1))" },1600 { "sun tan", "((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1))" }, 1601 { "sun tan cream", "((Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3)) SYNONYM lotion:(pos=1))" }, 1602 { "beach sun tan holiday", "(Zbeach:(pos=1) OR ((Zsun:(pos=2) OR Ztan:(pos=3)) SYNONYM bathe:(pos=2)) OR Zholiday:(pos=4))" }, 1603 { "sun tan sun tan cream", "(((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1)) OR ((Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5)) SYNONYM lotion:(pos=3)))" }, 1604 { "single", "(Zsingl:(pos=1) SYNONYM record:(pos=1))" }, 1605 1605 { NULL, NULL } 1606 1606 }; 1607 1607 … … 1640 1640 1641 1641 static test test_synonym_op_queries[] = { 1642 1642 { "searching", "Zsearch:(pos=1)" }, 1643 { "~searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) ORZlocate:(pos=1))" },1644 { "~search", "(Zsearch:(pos=1) ORfind:(pos=1))" },1645 { "~Search", "(search:(pos=1) ORfind:(pos=1))" },1643 { "~searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" }, 1644 { "~search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" }, 1645 { "~Search", "(search:(pos=1) SYNONYM find:(pos=1))" }, 1646 1646 { "~Searching", "searching:(pos=1)" }, 1647 { "~searching OR terms", "( Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },1648 { "~search OR terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1649 { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) ORfind:(pos=1)))" },1650 { "~search -terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_NOT Zterm:(pos=2))" },1651 { "+~search terms", "((Zsearch:(pos=1) ORfind:(pos=1)) AND_MAYBE Zterm:(pos=2))" },1652 { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) ORfind:(pos=1)))" },1653 { "~search terms", "( Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },1647 { "~searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" }, 1648 { "~search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1649 { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1650 { "~search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" }, 1651 { "+~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" }, 1652 { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" }, 1653 { "~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" }, 1654 1654 // FIXME: should look for multi-term synonym... 1655 1655 { "~\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" }, 1656 1656 { NULL, NULL } -
xapian-core/include/xapian/query.h
119 119 OP_VALUE_GE, 120 120 121 121 /** Filter by a less-than-or-equal test on a document value. */ 122 OP_VALUE_LE 122 OP_VALUE_LE, 123 124 /** Treat a set of queries as synonyms. 125 * 126 * This returns all results which match at least one of the 127 * queries, but weighting as if all the sub-queries are instances 128 * of the same term: so multiple matching terms for a document 129 * increase the wdf value used, and the term frequency is based on 130 * the number of documents which would match an OR of all the 131 * subqueries. 132 * 133 * The term frequency used will usually be an approximation, 134 * because calculating the precise combined term frequency would 135 * be overly expensive. 136 * 137 * Identical to OP_OR, except for the weightings returned. 138 */ 139 OP_SYNONYM 123 140 } op; 124 141 125 142 /** Copy constructor. */ -
xapian-core/include/xapian/weight.h
212 212 const std::string & term, Xapian::termcount wqf_, 213 213 double factor); 214 214 215 /** @private @internal Initialise this object to calculate weights for a 216 * synonym. 217 * 218 * @param stats Source of statistics. 219 * @param query_len_ Query length. 220 * @param factor Any scaling factor (e.g. from OP_SCALE_WEIGHT). 221 * @param termfreq The termfreq to use. 222 */ 223 void init_(const Internal & stats, Xapian::termcount query_len_, 224 double factor, Xapian::doccount termfreq); 225 215 226 /** @private @internal Initialise this object to calculate the extra weight 216 227 * component. 217 228 * -
xapian-core/api/omqueryinternal.cc
65 65 case Xapian::Query::OP_VALUE_RANGE: 66 66 case Xapian::Query::OP_VALUE_GE: 67 67 case Xapian::Query::OP_VALUE_LE: 68 case Xapian::Query::OP_SYNONYM: 68 69 return 0; 69 70 case Xapian::Query::OP_SCALE_WEIGHT: 70 71 return 1; … … 100 101 case Xapian::Query::OP_NEAR: 101 102 case Xapian::Query::OP_PHRASE: 102 103 case Xapian::Query::OP_ELITE_SET: 104 case Xapian::Query::OP_SYNONYM: 103 105 return UINT_MAX; 104 106 default: 105 107 Assert(false); … … 221 223 result += "."; 222 224 result += str_parameter; // serialise_double(get_dbl_parameter()); 223 225 break; 226 case Xapian::Query::OP_SYNONYM: 227 result += "="; 228 break; 224 229 } 225 230 } 226 231 return result; … … 251 256 case Xapian::Query::OP_VALUE_GE: name = "VALUE_GE"; break; 252 257 case Xapian::Query::OP_VALUE_LE: name = "VALUE_LE"; break; 253 258 case Xapian::Query::OP_SCALE_WEIGHT: name = "SCALE_WEIGHT"; break; 259 case Xapian::Query::OP_SYNONYM: name = "SYNONYM"; break; 254 260 } 255 261 return name; 256 262 } … … 584 590 return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT, 585 591 subqs, 0, param); 586 592 } 587 default: 593 case '=': { 594 return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs); 595 } 596 default: 588 597 LOGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'"); 589 598 throw Xapian::InvalidArgumentError("Invalid query string"); 590 599 } … … 809 818 case OP_ELITE_SET: 810 819 case OP_OR: 811 820 case OP_XOR: 821 case OP_SYNONYM: 812 822 // Doing an "OR" type operation - if we've got any MatchNothing 813 823 // subnodes, drop them; except that we mustn't become an empty 814 824 // node due to this, so we never drop a MatchNothing subnode … … 900 910 } 901 911 } 902 912 break; 903 case OP_OR: case OP_AND: case OP_XOR: 913 case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM: 904 914 // Remove duplicates if we can. 905 915 if (subqs.size() > 1) collapse_subqs(); 906 916 break; … … 944 954 void 945 955 Xapian::Query::Internal::collapse_subqs() 946 956 { 947 Assert(op == OP_OR || op == OP_AND || op == OP_XOR );957 Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM); 948 958 typedef set<Xapian::Query::Internal *, SortPosName> subqtable; 949 959 subqtable sqtab; 950 960 … … 1038 1048 Assert(!is_leaf(op)); 1039 1049 if (subq == 0) { 1040 1050 subqs.push_back(0); 1041 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR )) {1051 } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) { 1042 1052 // Distribute the subquery. 1043 1053 for (subquery_list::const_iterator i = subq->subqs.begin(); 1044 1054 i != subq->subqs.end(); i++) {