Index: matcher/Makefile.mk
===================================================================
--- matcher/Makefile.mk	(revision 8957)
+++ matcher/Makefile.mk	(working copy)
@@ -15,6 +15,7 @@
 	matcher/phrasepostlist.h\
 	matcher/remotesubmatch.h\
 	matcher/selectpostlist.h\
+	matcher/synonympostlist.h\
 	matcher/valuerangepostlist.h\
 	matcher/xorpostlist.h
 
@@ -47,6 +48,7 @@
 	matcher/phrasepostlist.cc\
 	matcher/rset.cc\
 	matcher/selectpostlist.cc\
+	matcher/synonympostlist.cc\
 	matcher/stats.cc\
 	matcher/tradweight.cc\
 	matcher/valuerangepostlist.cc\
Index: matcher/localmatch.cc
===================================================================
--- matcher/localmatch.cc	(revision 8957)
+++ matcher/localmatch.cc	(working copy)
@@ -3,6 +3,7 @@
  * Copyright 1999,2000,2001 BrightStation PLC
  * Copyright 2002 Ananova Ltd
  * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts
+ * Copyright 2007 Lemur Consulting Ltd
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -38,6 +39,7 @@
 #include "mergepostlist.h"
 #include "extraweightpostlist.h"
 #include "valuerangepostlist.h"
+#include "synonympostlist.h"
 
 #include "omqueryinternal.h"
 
@@ -262,6 +264,28 @@
     }
 }
 
+// Convert a list of subqueries into a vector of postlists.
+void
+LocalSubMatch::postlists_from_queries(std::vector<PostList *> &result,
+				      const Xapian::Query::Internal::subquery_list &queries,
+				      MultiMatch * matcher, bool is_bool)
+{
+    Assert(queries.size() >= 2);
+
+    // Open a postlist for each query, and store these postlists in a vector.
+    result.reserve(queries.size());
+
+    Xapian::Query::Internal::subquery_list::const_iterator q;
+    for (q = queries.begin(); q != queries.end(); q++) {
+	result.push_back(postlist_from_query(*q, matcher, is_bool));
+	DEBUGLINE(MATCH, "Made postlist for " << (*q)->get_description() <<
+		  ": termfreq is: (min, est, max) = (" <<
+		  result.back()->get_termfreq_min() << ", " <<
+		  result.back()->get_termfreq_est() << ", " <<
+		  result.back()->get_termfreq_max() << ")");
+    }
+}
+
 // Make a postlist from the subqueries of a query objects.
 // Operation must be either AND, OR, XOR, PHRASE, NEAR, or ELITE_SET.
 // Optimise query by building tree carefully.
@@ -270,27 +294,17 @@
 	const Xapian::Query::Internal *query, MultiMatch *matcher, bool is_bool)
 {
     DEBUGCALL(MATCH, PostList *, "LocalSubMatch::postlist_from_queries", op << ", " << query << ", " << matcher << ", " << is_bool);
-    Assert(op == Xapian::Query::OP_OR || op == Xapian::Query::OP_AND ||
+    Assert(op == Xapian::Query::OP_OR ||
+	   op == Xapian::Query::OP_AND ||
 	   op == Xapian::Query::OP_XOR ||
-	   op == Xapian::Query::OP_NEAR || op == Xapian::Query::OP_PHRASE ||
+	   op == Xapian::Query::OP_NEAR ||
+	   op == Xapian::Query::OP_PHRASE ||
 	   op == Xapian::Query::OP_ELITE_SET);
-    const Xapian::Query::Internal::subquery_list &queries = query->subqs;
-    Assert(queries.size() >= 2);
 
     // Open a postlist for each query, and store these postlists in a vector.
     std::vector<PostList *> postlists;
-    postlists.reserve(queries.size());
+    postlists_from_queries(postlists, query->subqs, matcher, is_bool);
 
-    Xapian::Query::Internal::subquery_list::const_iterator q;
-    for (q = queries.begin(); q != queries.end(); q++) {
-	postlists.push_back(postlist_from_query(*q, matcher, is_bool));
-	DEBUGLINE(MATCH, "Made postlist for " << (*q)->get_description() <<
-		  ": termfreq is: (min, est, max) = (" <<
-		  postlists.back()->get_termfreq_min() << ", " <<
-		  postlists.back()->get_termfreq_est() << ", " <<
-		  postlists.back()->get_termfreq_max() << ")");
-    }
-
     // Build tree
     switch (op) {
 	case Xapian::Query::OP_XOR:
@@ -427,6 +441,31 @@
 	    pl->set_termweight(wt);
 	    RETURN(pl);
 	}
+	case Xapian::Query::OP_SYNONYM:
+	{
+	    if (is_bool) {
+		// An or postlist returns the same documents as a synonym
+		// postlist, and doesn't have the overhead of calculating the
+		// term frequency, so is more efficient than a synonym postlist
+		// if we don't care about the weights.
+		RETURN(postlist_from_queries(Xapian::Query::OP_OR, query, matcher, is_bool));
+	    } else {
+		AutoPtr<Xapian::Weight> wt;
+		// Use a wqf of 1, since we don't have a specific value.
+		// Set the term name to "", since we don't have one of them, either.
+		wt = wt_factory->create(&statssource, qlen, 1, "");
+
+		std::vector<PostList *> postlists;
+		postlists_from_queries(postlists, query->subqs, matcher, is_bool);
+
+		// build_or_tree empties "postlists", but we need to have them
+		// available to get statistics, so we need to keep a copy
+		// FIXME: there must be a cleaner way for this to work...
+		std::vector<PostList *> postlists_orig = postlists;
+		PostList *res = build_or_tree(postlists, matcher);
+		RETURN(new SynonymPostList(res, postlists_orig, matcher, wt.release()));
+	    }
+	}
 	case Xapian::Query::OP_PHRASE:
 	case Xapian::Query::OP_NEAR:
 	    // If no positional information in this sub-database, change the
Index: matcher/localmatch.h
===================================================================
--- matcher/localmatch.h	(revision 8957)
+++ matcher/localmatch.h	(working copy)
@@ -76,6 +76,16 @@
     PostList * build_xor_tree(std::vector<PostList *> &postlists,
 			      MultiMatch *matcher);
 
+
+    /** Convert a list of subqueries into a vector of postlists.
+     *
+     *  FIXME - expand documentation comment.
+     */
+    void postlists_from_queries(std::vector<PostList *> &result,
+				const Xapian::Query::Internal::subquery_list &queries,
+				MultiMatch *matcher,
+				bool is_bool);
+
     /** Convert the sub-queries of a Query into an optimised PostList tree.
      *
      *  We take the sub-queries from @a query, but use @op instead of
Index: matcher/synonympostlist.h
===================================================================
--- matcher/synonympostlist.h	(revision 0)
+++ matcher/synonympostlist.h	(revision 0)
@@ -0,0 +1,100 @@
+/* synonympostlist.h: Combine subqueries, weighting as if they are synonyms
+ *
+ * Copyright 2007 Lemur Consulting Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+#ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
+#define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
+
+#include "multimatch.h"
+#include "postlist.h"
+#include <vector>
+
+/** A postlist comprising several postlists SYNONYMed together.
+ *
+ *  This postlist returns all postings in the OR of the sub postlists, but
+ *  returns weights as if they represented a single term.  The term frequency
+ *  portion of the weight is approximated.
+ */
+class SynonymPostList : public PostList {
+    private:
+	PostList * subtree;
+	std::vector<PostList *> terms;
+
+	/** The object which is using this postlist to perform
+	 *  a match.  This object needs to be notified when the
+	 *  tree changes such that the maximum weights need to be
+	 *  recalculated.
+	 */
+	MultiMatch *matcher;
+
+	const Xapian::Weight * wt;
+	bool want_doclength;
+
+    public:
+	SynonymPostList(PostList *subtree_,
+			const std::vector<PostList *> & terms_,
+			MultiMatch * matcher_,
+			const Xapian::Weight * wt_)
+		: subtree(subtree_),
+		  terms(terms_),
+		  matcher(matcher_),
+		  wt(wt_),
+		  want_doclength(wt_->get_sumpart_needs_doclength())
+	{
+	}
+
+	PostList *next(Xapian::weight w_min);
+	PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
+
+	Xapian::weight get_weight() const;
+	Xapian::weight get_maxweight() const;
+	Xapian::weight recalc_maxweight();
+	Xapian::termcount get_wdf() const;
+
+	// The following methods just call through to the subtree.
+	Xapian::doccount get_termfreq_min() const {
+	    return subtree->get_termfreq_min();
+	}
+	Xapian::doccount get_termfreq_est() const {
+	    return subtree->get_termfreq_est();
+	}
+	Xapian::doccount get_termfreq_max() const {
+	    return subtree->get_termfreq_max();
+	}
+	Xapian::docid get_docid() const {
+	    return subtree->get_docid();
+	}
+	Xapian::doclength get_doclength() const {
+	    return subtree->get_doclength();
+	}
+	PositionList * read_position_list() {
+	    return subtree->read_position_list();
+	}
+	PositionList * open_position_list() const {
+	    return subtree->open_position_list();
+	}
+	bool at_end() const {
+	    return subtree->at_end();
+	}
+
+	std::string get_description() const {
+	    return "(Synonym " + subtree->get_description() + ")";
+	}
+};
+
+#endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */

Property changes on: matcher/synonympostlist.h
___________________________________________________________________
Name: svn:eol-style
   + native

Index: matcher/synonympostlist.cc
===================================================================
--- matcher/synonympostlist.cc	(revision 0)
+++ matcher/synonympostlist.cc	(revision 0)
@@ -0,0 +1,73 @@
+/* synonympostlist.cc: Combine subqueries, weighting as if they are synonyms
+ *
+ * Copyright 2007 Lemur Consulting Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+#include <config.h>
+
+#include "synonympostlist.h"
+#include "branchpostlist.h"
+#include "omassert.h"
+#include "omdebug.h"
+
+PostList *
+SynonymPostList::next(Xapian::weight w_min)
+{
+    DEBUGCALL(MATCH, PostList *, "SynonymPostList::next", w_min);
+    next_handling_prune(subtree, w_min, matcher);
+    RETURN(NULL);
+}
+
+PostList *
+SynonymPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
+{
+    DEBUGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did << ", " << w_min);
+    skip_to_handling_prune(subtree, did, w_min, matcher);
+    RETURN(NULL);
+}
+
+Xapian::weight
+SynonymPostList::get_weight() const
+{
+    return wt->get_sumpart(get_wdf(), want_doclength ? get_doclength() : 0);
+}
+
+Xapian::weight
+SynonymPostList::get_maxweight() const
+{
+    return wt->get_maxpart();
+}
+
+Xapian::weight
+SynonymPostList::recalc_maxweight()
+{
+    return SynonymPostList::get_maxweight();
+}
+
+Xapian::termcount
+SynonymPostList::get_wdf() const {
+    std::vector<PostList *>::const_iterator i;
+    Xapian::termcount wdf = 0;
+    Xapian::docid did = get_docid();
+    for (i = terms.begin(); i != terms.end(); ++i) {
+	if ((*i)->get_docid() == did)
+	    wdf += (*i)->get_wdf();
+    }
+    return wdf;
+}
+

Property changes on: matcher/synonympostlist.cc
___________________________________________________________________
Name: svn:eol-style
   + native

Index: tests/api_db.cc
===================================================================
--- tests/api_db.cc	(revision 8957)
+++ tests/api_db.cc	(working copy)
@@ -1129,6 +1129,34 @@
     return true;
 }
 
+// Check a synonym search
+static bool test_synonym1()
+{
+    Xapian::Database db(get_database("etext"));
+    Xapian::Enquire enquire(db);
+    enquire.set_query(Xapian::Query(Xapian::Query::OP_OR,
+				    Xapian::Query("date"),
+				    Xapian::Query("sky")));
+    Xapian::doccount lots = 214;
+    Xapian::MSet ormset = enquire.get_mset(0, lots);
+
+    enquire.set_query(Xapian::Query(Xapian::Query::OP_SYNONYM,
+				    Xapian::Query("date"),
+				    Xapian::Query("sky")));
+    Xapian::MSet mset = enquire.get_mset(0, lots);
+
+    TEST_NOT_EQUAL(mset.size(), 0);
+    TEST_EQUAL(mset.size(), ormset.size());
+    for (Xapian::doccount i = 0; i < mset.size(); ++i) {
+	printf("%d,\t%f,\t%d,\t%f\n",
+	       *mset[i], mset[i].get_weight(),
+	       *ormset[i], ormset[i].get_weight());
+	//TEST_EQUAL(*mset[i], *ormset[i]);
+	//TEST_EQUAL_DOUBLE(mset[i].get_weight(), ormset[i].get_weight());
+    }
+    return true;
+}
+
 // tests that specifying a nonexistent input file throws an exception.
 static bool test_quartzdatabaseopeningerror1()
 {
@@ -1707,6 +1735,7 @@
     // with that, and testing it there doesn't actually improve the test
     // coverage really.
     {"consistency1",	   test_consistency1},
+    {"synonym1",           test_synonym1},
     // Would work with remote if we registered the weighting scheme.
     // FIXME: do this so we also test that functionality...
     {"userweight1",	   test_userweight1},
@@ -1731,6 +1760,7 @@
     {"keepalive1",	   test_keepalive1},
     {"termstats",	   test_termstats},
     {"sortvalue1",	   test_sortvalue1},
+    {"synonym1",           test_synonym1},
     {"sortrel1",	   test_sortrel1},
     {"netstats1",	   test_netstats1},
     {0, 0}
Index: include/xapian/query.h
===================================================================
--- include/xapian/query.h	(revision 8957)
+++ include/xapian/query.h	(working copy)
@@ -4,7 +4,7 @@
 /* Copyright 1999,2000,2001 BrightStation PLC
  * Copyright 2002 Ananova Ltd
  * Copyright 2003,2004,2005,2006,2007 Olly Betts
- * Copyright 2006 Lemur Consulting Ltd
+ * Copyright 2006,2007 Lemur Consulting Ltd
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -96,6 +96,23 @@
 	    /** Filter by a range test on a document value. */
 	    OP_VALUE_RANGE,
 
+	    /** Treat a set of queries as synonyms.
+	     *
+	     *  This returns all results which match at least one of the
+	     *  queries, but weighting as if all the sub-queries are instances
+	     *  of the same term: so multiple matching terms for a document
+	     *  increase the wdf value used, and the term frequency is based on
+	     *  the number of documents which would match an OR of all the
+	     *  subqueries.
+	     *
+	     *  The term frequency used will usually be an approximation,
+	     *  because calculating the precise combined term frequency would
+	     *  be overly expensive.
+	     *
+	     *  Identical to OP_OR, except for the weightings returned.
+	     */
+	    OP_SYNONYM,
+
 	    /** Select an elite set from the subqueries, and perform
 	     *  a query with these combined as an OR query.
 	     */
Index: api/omqueryinternal.cc
===================================================================
--- api/omqueryinternal.cc	(revision 8957)
+++ api/omqueryinternal.cc	(working copy)
@@ -3,7 +3,7 @@
  * Copyright 1999,2000,2001 BrightStation PLC
  * Copyright 2002 Ananova Ltd
  * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts
- * Copyright 2006 Lemur Consulting Ltd
+ * Copyright 2006,2007 Lemur Consulting Ltd
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -57,6 +57,7 @@
 	case Xapian::Query::OP_PHRASE:
 	case Xapian::Query::OP_ELITE_SET:
 	case Xapian::Query::OP_VALUE_RANGE:
+	case Xapian::Query::OP_SYNONYM:
 	    return 0;
 	case Xapian::Query::OP_FILTER:
 	case Xapian::Query::OP_AND_MAYBE:
@@ -85,6 +86,7 @@
 	case Xapian::Query::OP_NEAR:
 	case Xapian::Query::OP_PHRASE:
 	case Xapian::Query::OP_ELITE_SET:
+	case Xapian::Query::OP_SYNONYM:
 	    return UINT_MAX;
 	default:
 	    Assert(false);
@@ -177,6 +179,9 @@
 		result += str_parameter;
 		result += om_tostring(parameter);
 		break;
+	    case Xapian::Query::OP_SYNONYM:
+		result += "=";
+		break;
 	}
     }
     return result;
@@ -202,6 +207,7 @@
 	case Xapian::Query::OP_PHRASE:          name = "PHRASE"; break;
 	case Xapian::Query::OP_ELITE_SET:       name = "ELITE_SET"; break;
 	case Xapian::Query::OP_VALUE_RANGE:     name = "VALUE_RANGE"; break;
+	case Xapian::Query::OP_SYNONYM:         name = "SYNONYM"; break;
     }
     return name;
 }
@@ -451,6 +457,8 @@
 		    return new Xapian::Query::Internal(Xapian::Query::OP_VALUE_RANGE, valno,
 						       start, stop);
 	        }
+		case '=':
+		    return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs);
 	        default:
 		    DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
 		    throw Xapian::InvalidArgumentError("Invalid query string");
@@ -617,6 +625,7 @@
         case OP_ELITE_SET:
         case OP_OR:
         case OP_XOR:
+	case OP_SYNONYM:
             // Doing an "OR" type operation - if we've got any MatchNothing
             // subnodes, drop them; except that we mustn't become an empty
             // node due to this, so we never drop a MatchNothing subnode
@@ -690,7 +699,7 @@
 		}
 	    }
 	    break;
-	case OP_OR: case OP_AND: case OP_XOR:
+	case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM:
 	    // Remove duplicates if we can.
 	    if (subqs.size() > 1) collapse_subqs();
 	    break;
@@ -734,7 +743,7 @@
 void
 Xapian::Query::Internal::collapse_subqs()
 {
-    Assert(op == OP_OR || op == OP_AND || op == OP_XOR);
+    Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM);
     typedef set<Xapian::Query::Internal *, SortPosName> subqtable;
     subqtable sqtab;
 
@@ -809,7 +818,7 @@
     Assert(!is_leaf(op));
     if (subq == 0) {
 	subqs.push_back(0);
-    } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
+    } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) {
 	// Distribute the subquery.
 	for (subquery_list::const_iterator i = subq->subqs.begin();
 	     i != subq->subqs.end(); i++) {
