Ticket #50: opsynonym4.patch

File opsynonym4.patch, 24.1 kB (added by richard, 14 months ago)

Updated implementation patch

  • matcher/Makefile.mk

     
    1717        matcher/remotesubmatch.h\ 
    1818        matcher/scaleweight.h\ 
    1919        matcher/selectpostlist.h\ 
     20        matcher/synonympostlist.h\ 
    2021        matcher/valuerangepostlist.h\ 
    2122        matcher/xorpostlist.h 
    2223 
     
    5354        matcher/scaleweight.cc\ 
    5455        matcher/selectpostlist.cc\ 
    5556        matcher/stats.cc\ 
     57        matcher/synonympostlist.cc\ 
    5658        matcher/tradweight.cc\ 
    5759        matcher/valuerangepostlist.cc\ 
    5860        matcher/weight.cc\ 
  • matcher/multimatch.cc

     
    374374    } 
    375375    Assert(!postlists.empty()); 
    376376 
     377#ifdef XAPIAN_DEBUG_VERBOSE 
     378    { 
     379        DEBUGLINE(MATCH, "termfreqandwts:"); 
     380        map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator tfwi; 
     381        for (tfwi = termfreqandwts.begin(); tfwi != termfreqandwts.end(); ++tfwi) 
     382        { 
     383            DEBUGLINE(MATCH, "termfreqandwt[" << tfwi->first << "] = " << tfwi->second.termfreq << ", " << tfwi->second.termweight); 
     384        } 
     385    } 
     386#endif 
     387 
    377388    // Get a single combined postlist 
    378389    PostList *pl; 
    379390    if (postlists.size() == 1) { 
     
    794805                DEBUGLINE(MATCH, "denom = " << denom << " percent_scale = " << percent_scale); 
    795806                Assert(percent_scale <= denom); 
    796807                denom *= greatest_wt; 
     808                if (denom == 0) { 
     809                    percent_scale = 1.0 / greatest_wt; 
     810                } else { 
    797811                Assert(denom > 0); 
    798812                percent_scale /= denom; 
     813                } 
    799814            } else { 
    800815                // If all the terms match, the 2 sums of weights cancel 
    801816                percent_scale = 1.0 / greatest_wt; 
  • matcher/localmatch.cc

     
    3232#include "omqueryinternal.h" 
    3333#include "queryoptimiser.h" 
    3434#include "scaleweight.h" 
     35#include "stats.h" 
     36#include "synonympostlist.h" 
    3537#include "weightinternal.h" 
    36 #include "stats.h" 
    3738 
    3839#include <cfloat> 
    3940#include <cmath> 
     
    114115} 
    115116 
    116117PostList * 
     118LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher) 
     119{ 
     120    DEBUGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist", 
     121              "[or_pl]"); 
     122    DEBUGLINE(MATCH, "or_pl->get_termfreq() = " << or_pl->get_termfreq_est()); 
     123    AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher)); 
     124    AutoPtr<Xapian::Weight> wt; 
     125 
     126    // FIXME:1.1: create the Xapian::Weight::Internal directly, and hold it in 
     127    // an AutoPtr until supplying it to wt_factory->create() in case of an 
     128    // exception. 
     129    Xapian::Weight::Internal * wt_internal(stats->create_weight_internal()); 
     130    wt_internal->termfreq = or_pl->get_termfreq_est(); 
     131    wt_internal->reltermfreq = 0; // FIXME - calculate this. 
     132    wt = wt_factory->create(wt_internal, qlen, 1, ""); 
     133 
     134    res->set_weight(wt.release()); 
     135    RETURN(res.release()); 
     136} 
     137 
     138PostList * 
    117139LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query, 
    118140                                           double factor) 
    119141{ 
    120142    DEBUGCALL(MATCH, PostList *, "LocalSubMatch::postlist_from_op_leaf_query", 
    121               query << ", " << factor); 
     143              query->get_description() << ", " << factor); 
    122144    Assert(query); 
    123145    AssertEq(query->op, Xapian::Query::Internal::OP_LEAF); 
    124146    Assert(query->subqs.empty()); 
     
    142164        Xapian::doccount tf = stats->get_termfreq(query->tname); 
    143165        Xapian::weight weight = boolean ? 0 : wt->get_maxpart(); 
    144166        Xapian::MSet::Internal::TermFreqAndWeight info(tf, weight); 
     167        DEBUGLINE(MATCH, "Setting term_info[" << query->tname << "] to (" << tf << ", " << weight << ")"); 
    145168        term_info.insert(make_pair(query->tname, info)); 
    146169    } else if (!boolean) { 
    147170        i->second.termweight += wt->get_maxpart(); 
     171        AssertEq(stats->get_termfreq(query->tname), i->second.termfreq); 
     172        DEBUGLINE(MATCH, "Increasing term_info[" << query->tname << "] to (" << i->second.termfreq << ", " << i->second.termweight << ")"); 
    148173    } 
    149174 
    150175    LeafPostList * pl = db->open_post_list(query->tname); 
  • matcher/localmatch.h

     
    8686    PostList * get_postlist_and_term_info(MultiMatch *matcher, 
    8787        std::map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts); 
    8888 
     89    /** Convert a postlist into a synonym postlist. 
     90     */ 
     91    PostList * make_synonym_postlist(PostList * or_pl, MultiMatch * matcher); 
     92 
    8993    /** Convert an OP_LEAF query to a PostList. 
    9094     * 
    9195     *  This is called by QueryOptimiser when it reaches an OP_LEAF query. 
  • matcher/synonympostlist.h

     
     1/* synonympostlist.h: Combine subqueries, weighting as if they are synonyms 
     2 * 
     3 * Copyright 2007 Lemur Consulting Ltd 
     4 * 
     5 * This program is free software; you can redistribute it and/or modify 
     6 * it under the terms of the GNU General Public License as published by 
     7 * the Free Software Foundation; either version 2 of the License, or 
     8 * (at your option) any later version. 
     9 * 
     10 * This program is distributed in the hope that it will be useful, 
     11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 
     12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
     13 * GNU General Public License for more details. 
     14 * 
     15 * You should have received a copy of the GNU General Public License 
     16 * along with this program; if not, write to the Free Software 
     17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA 
     18 */ 
     19 
     20#ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 
     21#define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H 
     22 
     23#include "multimatch.h" 
     24#include "postlist.h" 
     25#include "stats.h" 
     26#include <vector> 
     27 
     28/** A postlist comprising several postlists SYNONYMed together. 
     29 * 
     30 *  This postlist returns all postings in the OR of the sub postlists, but 
     31 *  returns weights as if they represented a single term.  The term frequency 
     32 *  portion of the weight is approximated. 
     33 */ 
     34class SynonymPostList : public PostList { 
     35    private: 
     36        /** The subtree, which starts as an OR of all the sub-postlists being 
     37         *  joined with Synonym, but may decay into something else. 
     38         */ 
     39        PostList * subtree; 
     40 
     41        /** The object which is using this postlist to perform 
     42         *  a match.  This object needs to be notified when the 
     43         *  tree changes such that the maximum weights need to be 
     44         *  recalculated. 
     45         */ 
     46        MultiMatch *matcher; 
     47 
     48        /** Weighting object used for calculating the synonym weights. 
     49         */ 
     50        const Xapian::Weight * wt; 
     51 
     52        /** Flag indicating whether the weighting object needs the doclength. 
     53         */ 
     54        bool want_doclength; 
     55 
     56    public: 
     57        SynonymPostList(PostList *subtree_, MultiMatch * matcher_); 
     58        ~SynonymPostList(); 
     59 
     60        /** Set the weight object to be used for the synonym postlist. 
     61         * 
     62         *  Ownership of the weight object passes to the synonym postlist - the 
     63         *  caller must not delete it after use. 
     64         */ 
     65        void set_weight(const Xapian::Weight * wt_); 
     66 
     67        PostList *next(Xapian::weight w_min); 
     68        PostList *skip_to(Xapian::docid did, Xapian::weight w_min); 
     69 
     70        Xapian::weight get_weight() const; 
     71        Xapian::weight get_maxweight() const; 
     72        Xapian::weight recalc_maxweight(); 
     73 
     74        // The following methods just call through to the subtree. 
     75        Xapian::termcount get_wdf() const; 
     76        Xapian::doccount get_termfreq_min() const; 
     77        Xapian::doccount get_termfreq_est() const; 
     78        Xapian::doccount get_termfreq_max() const; 
     79        Xapian::docid get_docid() const; 
     80        Xapian::doclength get_doclength() const; 
     81        PositionList * read_position_list(); 
     82        PositionList * open_position_list() const; 
     83        bool at_end() const; 
     84 
     85        std::string get_description() const; 
     86}; 
     87 
     88#endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */ 
  • matcher/queryoptimiser.cc

    Property changes on: matcher/synonympostlist.h
    ___________________________________________________________________
    Name: svn:eol-style
       + native
    
     
    5151QueryOptimiser::do_subquery(const Xapian::Query::Internal * query, double factor) 
    5252{ 
    5353    DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_subquery", 
    54               query << ", " << factor); 
     54              query->get_description() << ", " << factor); 
    5555 
    5656    // Handle QueryMatchNothing. 
    5757    if (!query) RETURN(new EmptyPostList()); 
     
    9999            RETURN(do_subquery(query->subqs[0], sub_factor)); 
    100100        } 
    101101 
     102        case Xapian::Query::OP_SYNONYM: { 
     103            RETURN(do_synonym(query, factor)); 
     104        } 
     105 
    102106        default: 
    103107            Assert(false); 
    104108            RETURN(NULL); 
    105109    } 
    106110} 
    107111 
     112PostList * 
     113QueryOptimiser::do_leaf(const Xapian::Query::Internal * query, double factor) { 
     114    return localsubmatch.postlist_from_op_leaf_query(query, factor); 
     115} 
     116 
    108117struct PosFilter { 
    109118    PosFilter(Xapian::Query::Internal::op_t op_, size_t begin_, size_t end_, 
    110119              Xapian::termcount window_) 
     
    122131QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor) 
    123132{ 
    124133    DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_and_like", 
    125               query << ", " << factor); 
     134              query->get_description() << ", " << factor); 
    126135 
    127136    list<PosFilter> pos_filters; 
    128137    vector<PostList *> plists; 
     
    172181                            list<PosFilter> & pos_filters) 
    173182{ 
    174183    DEBUGCALL(MATCH, void, "QueryOptimiser::do_and_like", 
    175               query << ", " << factor << ", [and_plists], [pos_filters]"); 
     184              query->get_description() << ", " << 
     185              factor << ", [and_plists], [pos_filters],"); 
    176186 
    177187    Xapian::Query::Internal::op_t op = query->op; 
    178188    Assert(is_and_like(op)); 
     
    251261QueryOptimiser::do_or_like(const Xapian::Query::Internal *query, double factor) 
    252262{ 
    253263    DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_or_like", 
    254               query << ", " << factor); 
     264              query->get_description() << ", " << factor); 
    255265 
    256266    // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do 
    257267    // for AND-like operations. 
    258268    Xapian::Query::Internal::op_t op = query->op; 
    259269    Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR || 
    260            op == Xapian::Query::OP_XOR); 
     270           op == Xapian::Query::OP_XOR || op == Xapian::Query::OP_SYNONYM); 
    261271 
     272    // We build an OR tree for OP_SYNONYM.  (The resulting tree will then be 
     273    // passed into a SynonymPostList, from which the weightings will come.) 
     274    if (op == Xapian::Query::OP_SYNONYM) { 
     275        op = Xapian::Query::OP_OR; 
     276    } 
     277 
    262278    const Xapian::Query::Internal::subquery_list &queries = query->subqs; 
    263279    AssertRel(queries.size(), >=, 2); 
    264280 
     
    333349                  ComparePostListTermFreqAscending()); 
    334350    } 
    335351} 
     352 
     353PostList * 
     354QueryOptimiser::do_synonym(const Xapian::Query::Internal *query, double factor) 
     355{ 
     356    DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_synonym", 
     357              query->get_description() << ", " << factor); 
     358 
     359    if (factor == 0.0) { 
     360        // If we have a factor of 0, we don't care about the weights, so 
     361        // we're just like a normal OR query. 
     362        RETURN(do_or_like(query, 0.0)); 
     363    } 
     364 
     365    AssertEq(query->wqf, 0); // FIXME - should we be doing something with the wqf? 
     366 
     367    // Build a postlist tree which we'll use to get the frequencies. 
     368    AutoPtr<PostList> freq_pl(do_or_like(query, 0.0)); 
     369 
     370    RETURN(localsubmatch.make_synonym_postlist(do_or_like(query, 0.0), 
     371                                               matcher)); 
     372} 
  • matcher/queryoptimiser.h

     
    6363     * 
    6464     *  @return         A PostList. 
    6565     */ 
    66     PostList * do_leaf(const Xapian::Query::Internal * query, double factor) { 
    67         return localsubmatch.postlist_from_op_leaf_query(query, factor); 
    68     } 
     66    PostList * do_leaf(const Xapian::Query::Internal * query, double factor); 
    6967 
    7068    /** Optimise an AND-like Xapian::Query::Internal subtree into a PostList 
    7169     *  subtree. 
     
    10199     */ 
    102100    PostList * do_or_like(const Xapian::Query::Internal *query, double factor); 
    103101 
     102    /** Optimise a synonym Xapian::Query::Internal subtree into a PostList 
     103     * 
     104     *  @param query    The subtree to optimise. 
     105     *  @param factor   How much to scale weights for this subtree by. 
     106     * 
     107     *  @return         A PostList subtree. 
     108     */ 
     109    PostList * do_synonym(const Xapian::Query::Internal *query, double factor); 
     110 
    104111  public: 
    105112    QueryOptimiser(const Xapian::Database::Internal & db_, 
    106113                   LocalSubMatch & localsubmatch_, 
  • matcher/synonympostlist.cc

     
     1/* synonympostlist.cc: Combine subqueries, weighting as if they are synonyms 
     2 * 
     3 * Copyright 2007 Lemur Consulting Ltd 
     4 * 
     5 * This program is free software; you can redistribute it and/or 
     6 * modify it under the terms of the GNU General Public License as 
     7 * published by the Free Software Foundation; either version 2 of the 
     8 * License, or (at your option) any later version. 
     9 * 
     10 * This program is distributed in the hope that it will be useful, 
     11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 
     12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
     13 * GNU General Public License for more details. 
     14 * 
     15 * You should have received a copy of the GNU General Public License 
     16 * along with this program; if not, write to the Free Software 
     17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 
     18 * USA 
     19 */ 
     20 
     21#include <config.h> 
     22 
     23#include "synonympostlist.h" 
     24#include "branchpostlist.h" 
     25#include "omassert.h" 
     26#include "omdebug.h" 
     27 
     28SynonymPostList::SynonymPostList(PostList *subtree_, 
     29                                 MultiMatch * matcher_) 
     30        : subtree(subtree_), 
     31          matcher(matcher_), 
     32          wt(NULL), 
     33          want_doclength(false) 
     34{ 
     35} 
     36 
     37SynonymPostList::~SynonymPostList() 
     38{ 
     39    delete wt; 
     40    delete subtree; 
     41} 
     42 
     43void 
     44SynonymPostList::set_weight(const Xapian::Weight * wt_) 
     45{ 
     46    delete(wt); 
     47    wt = wt_; 
     48    want_doclength = wt_->get_sumpart_needs_doclength(); 
     49} 
     50 
     51PostList * 
     52SynonymPostList::next(Xapian::weight w_min) 
     53{ 
     54    DEBUGCALL(MATCH, PostList *, "SynonymPostList::next", w_min); 
     55    next_handling_prune(subtree, w_min, matcher); 
     56    RETURN(NULL); 
     57} 
     58 
     59PostList * 
     60SynonymPostList::skip_to(Xapian::docid did, Xapian::weight w_min) 
     61{ 
     62    DEBUGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did << ", " << w_min); 
     63    skip_to_handling_prune(subtree, did, w_min, matcher); 
     64    RETURN(NULL); 
     65} 
     66 
     67Xapian::weight 
     68SynonymPostList::get_weight() const 
     69{ 
     70    return wt->get_sumpart(get_wdf(), want_doclength ? get_doclength() : 0); 
     71} 
     72 
     73Xapian::weight 
     74SynonymPostList::get_maxweight() const 
     75{ 
     76    return wt->get_maxpart(); 
     77} 
     78 
     79Xapian::weight 
     80SynonymPostList::recalc_maxweight() 
     81{ 
     82    return SynonymPostList::get_maxweight(); 
     83} 
     84 
     85Xapian::termcount 
     86SynonymPostList::get_wdf() const { 
     87    return subtree->get_wdf(); 
     88} 
     89 
     90Xapian::doccount  
     91SynonymPostList::get_termfreq_min() const { 
     92    return subtree->get_termfreq_min(); 
     93} 
     94 
     95Xapian::doccount  
     96SynonymPostList::get_termfreq_est() const { 
     97    return subtree->get_termfreq_est(); 
     98} 
     99 
     100Xapian::doccount  
     101SynonymPostList::get_termfreq_max() const { 
     102    return subtree->get_termfreq_max(); 
     103} 
     104 
     105Xapian::docid  
     106SynonymPostList::get_docid() const { 
     107    return subtree->get_docid(); 
     108} 
     109 
     110Xapian::doclength  
     111SynonymPostList::get_doclength() const { 
     112    return subtree->get_doclength(); 
     113} 
     114 
     115PositionList *  
     116SynonymPostList::read_position_list() { 
     117    return subtree->read_position_list(); 
     118} 
     119 
     120PositionList *  
     121SynonymPostList::open_position_list() const { 
     122    return subtree->open_position_list(); 
     123} 
     124 
     125bool  
     126SynonymPostList::at_end() const { 
     127    return subtree->at_end(); 
     128} 
     129 
     130std::string 
     131SynonymPostList::get_description() const 
     132{ 
     133    return "(Synonym " + subtree->get_description() + ")"; 
     134} 
  • tests/api_db.cc

    Property changes on: matcher/synonympostlist.cc
    ___________________________________________________________________
    Name: svn:eol-style
       + native
    
     
    11711171    return true; 
    11721172} 
    11731173 
     1174// Check a synonym search 
     1175DEFINE_TESTCASE(synonym1, backend) { 
     1176    Xapian::Database db(get_database("etext")); 
     1177    Xapian::doccount lots = 214; 
     1178    vector<vector<Xapian::Query> > subqueries_list; 
     1179 
     1180    vector<Xapian::Query> subqueries; 
     1181    subqueries.push_back(Xapian::Query("date")); 
     1182    subqueries_list.push_back(subqueries); 
     1183 
     1184    subqueries.clear(); 
     1185    subqueries.push_back(Xapian::Query("sky")); 
     1186    subqueries.push_back(Xapian::Query("date")); 
     1187    subqueries_list.push_back(subqueries); 
     1188 
     1189    subqueries.clear(); 
     1190    subqueries.push_back(Xapian::Query("date")); 
     1191    subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
     1192                                       Xapian::Query("sky"), 
     1193                                       Xapian::Query("glove"))); 
     1194    subqueries_list.push_back(subqueries); 
     1195 
     1196    subqueries.clear(); 
     1197    subqueries.push_back(Xapian::Query("sky")); 
     1198    subqueries.push_back(Xapian::Query("date")); 
     1199    subqueries.push_back(Xapian::Query("stein")); 
     1200    subqueries.push_back(Xapian::Query("ally")); 
     1201    subqueries_list.push_back(subqueries); 
     1202 
     1203    subqueries.clear(); 
     1204    subqueries.push_back(Xapian::Query("sky")); 
     1205    subqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE, 
     1206                                       Xapian::Query("date"), 
     1207                                       Xapian::Query("stein"))); 
     1208    subqueries_list.push_back(subqueries); 
     1209 
     1210    for (vector<vector<Xapian::Query> >::const_iterator 
     1211         qlist = subqueries_list.begin(); 
     1212         qlist != subqueries_list.end(); ++qlist) 
     1213    { 
     1214        // Run two queries, one joining the subqueries with OR and one joining them 
     1215        // with SYNONYM. 
     1216        Xapian::Enquire enquire(db); 
     1217        enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, qlist->begin(), qlist->end())); 
     1218        Xapian::MSet ormset = enquire.get_mset(0, lots); 
     1219        Xapian::Query synquery(Xapian::Query::OP_SYNONYM, qlist->begin(), qlist->end()); 
     1220        tout << synquery << "\n"; 
     1221        enquire.set_query(synquery); 
     1222        Xapian::MSet mset = enquire.get_mset(0, lots); 
     1223 
     1224        // Check that the queries return some results. 
     1225        TEST_NOT_EQUAL(mset.size(), 0); 
     1226        // Check that the queries return the same number of results. 
     1227        TEST_EQUAL(mset.size(), ormset.size()); 
     1228        map<Xapian::docid, Xapian::weight> values_or; 
     1229        map<Xapian::docid, Xapian::weight> values_synonym; 
     1230        for (Xapian::doccount i = 0; i < mset.size(); ++i) { 
     1231            values_or[*ormset[i]] = ormset[i].get_weight(); 
     1232            values_synonym[*mset[i]] = mset[i].get_weight(); 
     1233        } 
     1234        TEST_EQUAL(values_or.size(), values_synonym.size()); 
     1235 
     1236        /* Check that the weights for each item in the or mset are different from 
     1237         * those in the synonym mset. (Note, it's technically possible that some 
     1238         * might be equal, but unlikely, so for now we just check that none are. 
     1239         * If this causes problems, we can change to just checking that most 
     1240         * differ.) */ 
     1241        for (map<Xapian::docid, Xapian::weight>::const_iterator 
     1242             j = values_or.begin(); 
     1243             j != values_or.end(); ++j) 
     1244        { 
     1245            Xapian::docid did = j->first; 
     1246            // Check that all the results in the or tree make it to the synonym tree. 
     1247            TEST(values_synonym.find(did) != values_synonym.end()); 
     1248            if (qlist->size() == 1) { 
     1249                // Check that the weights are the same. 
     1250                TEST_EQUAL(values_or[did], values_synonym[did]); 
     1251            } else { 
     1252                // Check that the weights differ. 
     1253                TEST_NOT_EQUAL(values_or[did], values_synonym[did]); 
     1254            } 
     1255        } 
     1256    } 
     1257    return true; 
     1258} 
     1259 
    11741260// tests that specifying a nonexistent input file throws an exception. 
    11751261DEFINE_TESTCASE(quartzdatabaseopeningerror1, quartz) { 
    11761262    mkdir(".quartz", 0755); 
  • include/xapian/query.h

     
    111111            /** Select an elite set from the subqueries, and perform 
    112112             *  a query with these combined as an OR query. 
    113113             */ 
    114             OP_ELITE_SET 
     114            OP_ELITE_SET, 
     115 
     116            /** Treat a set of queries as synonyms. 
     117             * 
     118             *  This returns all results which match at least one of the 
     119             *  queries, but weighting as if all the sub-queries are instances 
     120             *  of the same term: so multiple matching terms for a document 
     121             *  increase the wdf value used, and the term frequency is based on 
     122             *  the number of documents which would match an OR of all the 
     123             *  subqueries. 
     124             * 
     125             *  The term frequency used will usually be an approximation, 
     126             *  because calculating the precise combined term frequency would 
     127             *  be overly expensive. 
     128             * 
     129             *  Identical to OP_OR, except for the weightings returned. 
     130             */ 
     131            OP_SYNONYM 
    115132        } op; 
    116133 
    117134        /** Copy constructor. */ 
  • common/remoteprotocol.h

     
    3838// 30.3: New MSG_GETMSET which passes check_at_least parameter. 
    3939// 30.4: New query operator OP_SCALE_WEIGHT. 
    4040// 30.5: New MSG_GETMSET which expects MSet's percent_factor to be returned. 
     41// 30.6: Add synonym queries (add operator to the serialised form of queries) 
    4142#define XAPIAN_REMOTE_PROTOCOL_MAJOR_VERSION 30 
    42 #define XAPIAN_REMOTE_PROTOCOL_MINOR_VERSION 5 
     43#define XAPIAN_REMOTE_PROTOCOL_MINOR_VERSION 6 
    4344 
    4445/* When we move to version 31: 
    4546 * + Remove MSG_DELETEDOCUMENT_PRE_30_2 
  • api/omqueryinternal.cc

     
    5959        case Xapian::Query::OP_PHRASE: 
    6060        case Xapian::Query::OP_ELITE_SET: 
    6161        case Xapian::Query::OP_VALUE_RANGE: 
     62        case Xapian::Query::OP_SYNONYM: 
    6263            return 0; 
    6364        case Xapian::Query::OP_SCALE_WEIGHT: 
    6465            return 1; 
     
    9192        case Xapian::Query::OP_NEAR: 
    9293        case Xapian::Query::OP_PHRASE: 
    9394        case Xapian::Query::OP_ELITE_SET: 
     95        case Xapian::Query::OP_SYNONYM: 
    9496            return UINT_MAX; 
    9597        default: 
    9698            Assert(false); 
     
    187189                result += "."; 
    188190                result += str_parameter; // serialise_double(get_dbl_parameter()); 
    189191                break; 
     192            case Xapian::Query::OP_SYNONYM: 
     193                result += "="; 
     194                break; 
    190195        } 
    191196    } 
    192197    return result; 
     
    213218        case Xapian::Query::OP_ELITE_SET:       name = "ELITE_SET"; break; 
    214219        case Xapian::Query::OP_VALUE_RANGE:     name = "VALUE_RANGE"; break; 
    215220        case Xapian::Query::OP_SCALE_WEIGHT:    name = "SCALE_WEIGHT"; break; 
     221        case Xapian::Query::OP_SYNONYM:         name = "SYNONYM"; break; 
    216222    } 
    217223    return name; 
    218224} 
     
    492498                    return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT, 
    493499                                            subqs, 0, param); 
    494500                } 
    495                 default: 
     501                case '=': { 
     502                    return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs); 
     503                } 
     504                default: 
    496505                    DEBUGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'"); 
    497506                    throw Xapian::InvalidArgumentError("Invalid query string"); 
    498507            } 
     
    662671        case OP_ELITE_SET: 
    663672        case OP_OR: 
    664673        case OP_XOR: 
     674        case OP_SYNONYM: 
    665675            // Doing an "OR" type operation - if we've got any MatchNothing 
    666676            // subnodes, drop them; except that we mustn't become an empty 
    667677            // node due to this, so we never drop a MatchNothing subnode 
     
    746756                } 
    747757            } 
    748758            break; 
    749         case OP_OR: case OP_AND: case OP_XOR: 
     759        case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM: 
    750760            // Remove duplicates if we can. 
    751761            if (subqs.size() > 1) collapse_subqs(); 
    752762            break; 
     
    790800void 
    791801Xapian::Query::Internal::collapse_subqs() 
    792802{ 
    793     Assert(op == OP_OR || op == OP_AND || op == OP_XOR); 
     803    Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM); 
    794804    typedef set<Xapian::Query::Internal *, SortPosName> subqtable; 
    795805    subqtable sqtab; 
    796806 
     
    865875    Assert(!is_leaf(op)); 
    866876    if (subq == 0) { 
    867877        subqs.push_back(0); 
    868     } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) { 
     878    } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) { 
    869879        // Distribute the subquery. 
    870880        for (subquery_list::const_iterator i = subq->subqs.begin(); 
    871881             i != subq->subqs.end(); i++) {