Ticket #50: opsynonym_changes_12599_12601.patch

File opsynonym_changes_12599_12601.patch, 67.2 KB (added by Richard Boulton, 16 years ago)

Latest patch

  • xapian-maintainer-tools/win32msvc/win32_matcher.mak

     
    3434    $(INTDIR)\queryoptimiser.obj\
    3535    $(INTDIR)\rset.obj\
    3636    $(INTDIR)\selectpostlist.obj\
     37    $(INTDIR)\synonympostlist.obj\
    3738    $(INTDIR)\valuerangepostlist.obj\
    3839    $(INTDIR)\valuegepostlist.obj\
    3940    $(INTDIR)\xorpostlist.obj\
     
    6061    $(INTDIR)\queryoptimiser.cc\
    6162    $(INTDIR)\rset.cc\
    6263    $(INTDIR)\selectpostlist.cc\
     64    $(INTDIR)\synonympostlist.cc\
    6365    $(INTDIR)\valuerangepostlist.cc\
    6466    $(INTDIR)\valuegepostlist.cc\
    6567    $(INTDIR)\xorpostlist.cc\
  • xapian-core/queryparser/queryparser.lemony

     
    22/* queryparser.lemony: build a Xapian::Query object from a user query string.
    33 *
    44 * Copyright (C) 2004,2005,2006,2007,2008 Olly Betts
     5 * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    287288            end = db.synonyms_end(term);
    288289        }
    289290        while (syn != end) {
    290             q = Query(Query::OP_OR, q, Query(*syn, 1, pos));
     291            q = Query(Query::OP_SYNONYM, q, Query(*syn, 1, pos));
    291292            ++syn;
    292293        }
    293294    }
     
    353354        }
    354355    }
    355356    delete this;
    356     return new Query(Query::OP_OR, subqs.begin(), subqs.end());
     357    return new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
    357358}
    358359
    359360Query *
    360361Term::as_partial_query(State * state_) const
    361362{
    362363    Database db = state_->get_database();
    363     vector<Query> subqs;
     364    vector<Query> subqs_partial; // A synonym of all the partial terms.
     365    vector<Query> subqs_full; // A synonym of all the full terms.
    364366    list<string>::const_iterator piter;
    365367    for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
    366368        string root = *piter;
    367369        root += name;
    368370        TermIterator t = db.allterms_begin(root);
    369371        while (t != db.allterms_end(root)) {
    370             subqs.push_back(Query(*t, 1, pos));
     372            subqs_partial.push_back(Query(*t, 1, pos));
    371373            ++t;
    372374        }
    373375        // Add the term, as it would normally be handled, as an alternative.
    374         subqs.push_back(Query(make_term(*piter), 1, pos));
     376        subqs_full.push_back(Query(make_term(*piter), 1, pos));
    375377    }
    376378    delete this;
    377     return new Query(Query::OP_OR, subqs.begin(), subqs.end());
     379    return new Query(Query::OP_OR,
     380                     Query(Query::OP_SYNONYM,
     381                           subqs_partial.begin(), subqs_partial.end()),
     382                     Query(Query::OP_SYNONYM,
     383                           subqs_full.begin(), subqs_full.end()));
    378384}
    379385
    380386inline bool
     
    11761182                subqs2.push_back(Query(*syn, 1, pos));
    11771183                ++syn;
    11781184            }
    1179             Query q_synonym_terms(Query::OP_OR, subqs2.begin(), subqs2.end());
     1185            Query q_synonym_terms(Query::OP_SYNONYM, subqs2.begin(), subqs2.end());
    11801186            subqs2.clear();
    1181             subqs.push_back(Query(Query::OP_OR,
     1187            subqs.push_back(Query(Query::OP_SYNONYM,
    11821188                                  q_original_terms, q_synonym_terms));
    11831189        }
    11841190    } else {
  • xapian-core/matcher/extraweightpostlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2001 Ananova Ltd
    55 * Copyright 2003,2004,2007,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
  • xapian-core/matcher/Makefile.mk

     
    1818        matcher/queryoptimiser.h\
    1919        matcher/remotesubmatch.h\
    2020        matcher/selectpostlist.h\
     21        matcher/synonympostlist.h\
    2122        matcher/valuegepostlist.h\
    2223        matcher/valuerangepostlist.h\
    2324        matcher/xorpostlist.h
     
    5455        matcher/queryoptimiser.cc\
    5556        matcher/rset.cc\
    5657        matcher/selectpostlist.cc\
     58        matcher/synonympostlist.cc\
    5759        matcher/valuegepostlist.cc\
    5860        matcher/valuerangepostlist.cc\
    5961        matcher/xorpostlist.cc
  • xapian-core/matcher/multimatch.cc

     
    794794
    795795                LOGVALUE(MATCH, denom);
    796796                LOGVALUE(MATCH, percent_scale);
    797                 Assert(percent_scale <= denom);
    798                 denom *= greatest_wt;
    799                 Assert(denom > 0);
    800                 percent_scale /= denom;
     797                AssertRel(percent_scale,<=,denom);
     798                if (denom == 0) {
     799                    // This happens if the top-level operator is OP_SYNONYM.
     800                    percent_scale = 1.0 / greatest_wt;
     801                } else {
     802                    denom *= greatest_wt;
     803                    AssertRel(denom,>,0);
     804                    percent_scale /= denom;
     805                }
    801806            } else {
    802807                // If all the terms match, the 2 sums of weights cancel
    803808                percent_scale = 1.0 / greatest_wt;
  • xapian-core/matcher/localmatch.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2008,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    3131#include "omdebug.h"
    3232#include "omqueryinternal.h"
    3333#include "queryoptimiser.h"
     34#include "synonympostlist.h"
    3435#include "weightinternal.h"
    3536
    3637#include <cfloat>
     
    111112}
    112113
    113114PostList *
     115LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher,
     116                                     double factor)
     117{
     118    DEBUGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist",
     119              "[or_pl], [matcher], " << factor);
     120    LOGVALUE(MATCH, or_pl->get_termfreq_est());
     121    AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher));
     122    AutoPtr<Xapian::Weight> wt(wt_factory->clone_());
     123
     124    TermFreqs freqs(or_pl->get_termfreq_est_using_stats(*stats));
     125    wt->init_(*stats, qlen, factor, freqs.termfreq, freqs.reltermfreq);
     126
     127    res->set_weight(wt.release());
     128    RETURN(res.release());
     129}
     130
     131PostList *
    114132LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query,
    115133                                           double factor)
    116134{
  • xapian-core/matcher/localmatch.h

     
    22 *  @brief SubMatch class for a local database.
    33 */
    44/* Copyright (C) 2006,2007,2009 Olly Betts
     5 * Copyright (C) 2007 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    8283    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    8384        std::map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts);
    8485
     86    /** Convert a postlist into a synonym postlist.
     87     */
     88    PostList * make_synonym_postlist(PostList * or_pl, MultiMatch * matcher,
     89                                     double factor);
     90
    8591    /** Convert an OP_LEAF query to a PostList.
    8692     *
    8793     *  This is called by QueryOptimiser when it reaches an OP_LEAF query.
  • xapian-core/matcher/msetpostlist.h

     
    22 *  @brief PostList returning entries from an MSet
    33 */
    44/* Copyright (C) 2006,2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
  • xapian-core/matcher/synonympostlist.h

     
     1/** @file synonympostlist.h
     2 * @brief Combine subqueries, weighting as if they are synonyms
     3 */
     4/* Copyright 2007,2009 Lemur Consulting Ltd
     5 *
     6 * This program is free software; you can redistribute it and/or modify
     7 * it under the terms of the GNU General Public License as published by
     8 * the Free Software Foundation; either version 2 of the License, or
     9 * (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     19 */
     20
     21#ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
     22#define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
     23
     24#include "multimatch.h"
     25#include "postlist.h"
     26
     27/** A postlist comprising several postlists SYNONYMed together.
     28 *
     29 *  This postlist returns all postings in the OR of the sub postlists, but
     30 *  returns weights as if they represented a single term.  The term frequency
     31 *  portion of the weight is approximated.
     32 */
     33class SynonymPostList : public PostList {
     34    /** The subtree, which starts as an OR of all the sub-postlists being
     35     *  joined with Synonym, but may decay into something else.
     36     */
     37    PostList * subtree;
     38
     39    /** The object which is using this postlist to perform a match.
     40     *
     41     *  This object needs to be notified when the tree changes such that the
     42     *  maximum weights need to be recalculated.
     43     */
     44    MultiMatch * matcher;
     45
     46    /// Weighting object used for calculating the synonym weights.
     47    const Xapian::Weight * wt;
     48
     49    /// Flag indicating whether the weighting object needs the doclength.
     50    bool want_doclength;
     51
     52    /// Flag indicating whether the weighting object needs the wdf.
     53    bool want_wdf;
     54
     55    /// Flag indicating if we've called recalc_maxweight on the subtree yet.
     56    bool have_calculated_subtree_maxweights;
     57
     58  public:
     59    SynonymPostList(PostList * subtree_, MultiMatch * matcher_)
     60        : subtree(subtree_), matcher(matcher_), wt(NULL),
     61          want_doclength(false), want_wdf(false),
     62          have_calculated_subtree_maxweights(false) { }
     63
     64    ~SynonymPostList();
     65
     66    /** Set the weight object to be used for the synonym postlist.
     67     *
     68     *  Ownership of the weight object passes to the synonym postlist - the
     69     *  caller must not delete it after use.
     70     */
     71    void set_weight(const Xapian::Weight * wt_);
     72
     73    PostList *next(Xapian::weight w_min);
     74    PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
     75
     76    Xapian::weight get_weight() const;
     77    Xapian::weight get_maxweight() const;
     78    Xapian::weight recalc_maxweight();
     79
     80    // The following methods just call through to the subtree.
     81    Xapian::termcount get_wdf() const;
     82    Xapian::doccount get_termfreq_min() const;
     83    Xapian::doccount get_termfreq_est() const;
     84    Xapian::doccount get_termfreq_max() const;
     85    TermFreqs get_termfreq_est_using_stats(
     86        const Xapian::Weight::Internal & stats) const;
     87    Xapian::docid get_docid() const;
     88    Xapian::termcount get_doclength() const;
     89    bool at_end() const;
     90
     91    std::string get_description() const;
     92};
     93
     94#endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */
  • xapian-core/matcher/queryoptimiser.cc

    Property changes on: xapian-core/matcher/synonympostlist.h
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
    
    Property changes on: xapian-core/matcher/collapser.h
    ___________________________________________________________________
    Deleted: svn:mergeinfo
    
     
    8282        case Xapian::Query::OP_ELITE_SET:
    8383            RETURN(do_or_like(query, factor));
    8484
     85        case Xapian::Query::OP_SYNONYM:
     86            RETURN(do_synonym(query, factor));
     87
    8588        case Xapian::Query::OP_AND_NOT: {
    8689            AssertEq(query->subqs.size(), 2);
    8790            PostList * l = do_subquery(query->subqs[0], factor);
     
    304307    // for AND-like operations.
    305308    Xapian::Query::Internal::op_t op = query->op;
    306309    Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR ||
    307            op == Xapian::Query::OP_XOR);
     310           op == Xapian::Query::OP_XOR || op == Xapian::Query::OP_SYNONYM);
    308311
    309312    const Xapian::Query::Internal::subquery_list &queries = query->subqs;
    310313    AssertRel(queries.size(), >=, 2);
     
    382385                  ComparePostListTermFreqAscending());
    383386    }
    384387}
     388
     389PostList *
     390QueryOptimiser::do_synonym(const Xapian::Query::Internal *query, double factor)
     391{
     392    DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_synonym",
     393              query << ", " << factor);
     394    if (factor == 0.0) {
     395        // If we have a factor of 0, we don't care about the weights, so
     396        // we're just like a normal OR query.
     397        RETURN(do_or_like(query, 0.0));
     398    }
     399
     400    // We currently assume wqf is 1 for calculating the synonym's weight
     401    // since conceptually the synonym is one "virtual" term.  If we were
     402    // to combine multiple occurrences of the same synonym expansion into
     403    // a single instance with wqf set, we would want to use the wqf.
     404    AssertEq(query->wqf, 0);
     405
     406    // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
     407    // SynonymPostList, which supplies the weights.
     408    RETURN(localsubmatch.make_synonym_postlist(do_or_like(query, 0.0),
     409                                               matcher, factor));
     410}
  • xapian-core/matcher/mergepostlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
  • xapian-core/matcher/queryoptimiser.h

     
    22 * @brief Convert a Xapian::Query::Internal tree into an optimal PostList tree.
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2008 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    8889     */
    8990    PostList * do_or_like(const Xapian::Query::Internal *query, double factor);
    9091
     92    /** Optimise a synonym Xapian::Query::Internal subtree into a PostList
     93     *
     94     *  @param query    The subtree to optimise.
     95     *  @param factor   How much to scale weights for this subtree by.
     96     *
     97     *  @return         A PostList subtree.
     98     */
     99    PostList * do_synonym(const Xapian::Query::Internal *query, double factor);
     100
    91101  public:
    92102    QueryOptimiser(const Xapian::Database::Internal & db_,
    93103                   LocalSubMatch & localsubmatch_,
  • xapian-core/matcher/msetpostlist.cc

     
    22 *  @brief PostList returning entries from an MSet
    33 */
    44/* Copyright (C) 2006,2007,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
  • xapian-core/matcher/synonympostlist.cc

     
     1/** @file synonympostlist.cc
     2 * @brief Combine subqueries, weighting as if they are synonyms
     3 */
     4/* Copyright 2007,2009 Lemur Consulting Ltd
     5 *
     6 * This program is free software; you can redistribute it and/or
     7 * modify it under the terms of the GNU General Public License as
     8 * published by the Free Software Foundation; either version 2 of the
     9 * License, or (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
     19 * USA
     20 */
     21
     22#include <config.h>
     23
     24#include "synonympostlist.h"
     25
     26#include "branchpostlist.h"
     27#include "debuglog.h"
     28
     29SynonymPostList::~SynonymPostList()
     30{
     31    delete wt;
     32    delete subtree;
     33}
     34
     35void
     36SynonymPostList::set_weight(const Xapian::Weight * wt_)
     37{
     38    delete wt;
     39    wt = wt_;
     40    want_doclength = wt->get_sumpart_needs_doclength_();
     41    want_wdf = wt->get_sumpart_needs_wdf_();
     42}
     43
     44PostList *
     45SynonymPostList::next(Xapian::weight w_min)
     46{
     47    LOGCALL(MATCH, PostList *, "SynonymPostList::next", w_min);
     48    (void)w_min;
     49    next_handling_prune(subtree, 0, matcher);
     50    RETURN(NULL);
     51}
     52
     53PostList *
     54SynonymPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
     55{
     56    LOGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did << ", " << w_min);
     57    (void)w_min;
     58    skip_to_handling_prune(subtree, did, 0, matcher);
     59    RETURN(NULL);
     60}
     61
     62Xapian::weight
     63SynonymPostList::get_weight() const
     64{
     65    LOGCALL(MATCH, Xapian::weight, "SynonymPostList::get_weight", "");
     66    // The wdf returned can be higher than the doclength.  In particular, this
     67    // can currently occur if the query contains a term more than once; the wdf
     68    // of each occurrence is added up.
     69    //
     70    // However, it's reasonable for weighting algorithms to optimise by
     71    // assuming that get_wdf() will never return more than get_doclength(),
     72    // since the doclength is the sum of the wdfs.
     73    //
     74    // Therefore, we simply clamp the wdf value to the doclength, to ensure
     75    // that this is true.  Note that this requires the doclength to be
     76    // calculated even if the weight object doesn't want it.
     77
     78    if (want_wdf) {
     79        Xapian::termcount wdf = get_wdf();
     80        Xapian::termcount doclen = get_doclength();
     81        if (wdf > doclen) wdf = doclen;
     82        RETURN(wt->get_sumpart(wdf, doclen));
     83    }
     84    RETURN(wt->get_sumpart(0, want_doclength ? get_doclength() : 0));
     85}
     86
     87Xapian::weight
     88SynonymPostList::get_maxweight() const
     89{
     90    LOGCALL(MATCH, Xapian::weight, "SynonymPostList::get_maxweight", "");
     91    RETURN(wt->get_maxpart());
     92}
     93
     94Xapian::weight
     95SynonymPostList::recalc_maxweight()
     96{
     97    LOGCALL(MATCH, Xapian::weight, "SynonymPostList::recalc_maxweight", "");
     98
     99    // Call recalc_maxweight on the subtree once, to ensure that the maxweights
     100    // are initialised.
     101    if (!have_calculated_subtree_maxweights) {
     102        subtree->recalc_maxweight();
     103        have_calculated_subtree_maxweights = true;
     104    }
     105    RETURN(SynonymPostList::get_maxweight());
     106}
     107
     108Xapian::termcount
     109SynonymPostList::get_wdf() const {
     110    LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_wdf", "");
     111    RETURN(subtree->get_wdf());
     112}
     113
     114Xapian::doccount
     115SynonymPostList::get_termfreq_min() const {
     116    LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", "");
     117    RETURN(subtree->get_termfreq_min());
     118}
     119
     120Xapian::doccount
     121SynonymPostList::get_termfreq_est() const {
     122    LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", "");
     123    RETURN(subtree->get_termfreq_est());
     124}
     125
     126Xapian::doccount
     127SynonymPostList::get_termfreq_max() const {
     128    LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", "");
     129    RETURN(subtree->get_termfreq_max());
     130}
     131
     132TermFreqs
     133SynonymPostList::get_termfreq_est_using_stats(
     134        const Xapian::Weight::Internal & stats) const
     135{
     136    LOGCALL(MATCH, TermFreqs,
     137            "SynonymPostList::get_termfreq_est_using_stats", stats);
     138    RETURN(subtree->get_termfreq_est_using_stats(stats));
     139}
     140
     141Xapian::docid
     142SynonymPostList::get_docid() const {
     143    LOGCALL(MATCH, Xapian::docid, "SynonymPostList::get_docid", "");
     144    RETURN(subtree->get_docid());
     145}
     146
     147Xapian::termcount
     148SynonymPostList::get_doclength() const {
     149    LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_doclength", "");
     150    RETURN(subtree->get_doclength());
     151}
     152
     153bool
     154SynonymPostList::at_end() const {
     155    LOGCALL(MATCH, bool, "SynonymPostList::at_end", "");
     156    RETURN(subtree->at_end());
     157}
     158
     159std::string
     160SynonymPostList::get_description() const
     161{
     162    return "(Synonym " + subtree->get_description() + ")";
     163}
  • xapian-core/weight/weight.cc

    Property changes on: xapian-core/matcher/synonympostlist.cc
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    22 * @brief Xapian::Weight base class
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    7778    init(factor);
    7879}
    7980
     81void
     82Weight::init_(const Internal & stats, Xapian::termcount query_length,
     83              double factor, Xapian::doccount termfreq,
     84              Xapian::doccount reltermfreq)
     85{
     86    LOGCALL_VOID(MATCH, "Weight::init_", stats << ", " << query_length <<
     87            ", " << factor << ", " << termfreq << ", " << reltermfreq);
     88    // Synonym case.
     89    collection_size_ = stats.collection_size;
     90    rset_size_ = stats.rset_size;
     91    if (stats_needed & AVERAGE_LENGTH)
     92        average_length_ = stats.get_average_length();
     93    if (stats_needed & DOC_LENGTH_MAX)
     94        doclength_upper_bound_ = stats.db.get_doclength_upper_bound();
     95    if (stats_needed & DOC_LENGTH_MIN)
     96        doclength_lower_bound_ = stats.db.get_doclength_lower_bound();
     97
     98    // The doclength is an upper bound on the wdf.  This is obviously true for
     99    // normal terms, but SynonymPostList ensures that it is also true for
     100    // synonym terms by clamping the wdf values returned to the doclength.
     101    //
     102    // (This clamping is only actually necessary in cases where a constituent
     103    // term of the synonym is repeated.)
     104    if (stats_needed & WDF_MAX)
     105        wdf_upper_bound_ = stats.db.get_doclength_upper_bound();
     106
     107    termfreq_ = termfreq;
     108    reltermfreq_ = reltermfreq;
     109    query_length_ = query_length;
     110    wqf_ = 1;
     111    init(factor);
     112}
     113
    80114Weight::~Weight() { }
    81115
    82116}
  • xapian-core/tests/api_opsynonym.cc

     
     1/** @file api_opsynonym.cc
     2 * @brief tests of OP_SYNONYM.
     3 */
     4/* Copyright 2009 Olly Betts
     5 * Copyright 2007,2008,2009 Lemur Consulting Ltd
     6 *
     7 * This program is free software; you can redistribute it and/or
     8 * modify it under the terms of the GNU General Public License as
     9 * published by the Free Software Foundation; either version 2 of the
     10 * License, or (at your option) any later version.
     11 *
     12 * This program is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with this program; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
     20 * USA
     21 */
     22
     23#include <config.h>
     24
     25#include "api_opsynonym.h"
     26
     27#include <map>
     28#include <set>
     29#include <vector>
     30
     31#include <xapian.h>
     32
     33#include "backendmanager.h"
     34#include "testsuite.h"
     35#include "testutils.h"
     36
     37#include "apitest.h"
     38
     39using namespace std;
     40
     41// #######################################################################
     42// # Tests start here
     43
     44// Check a synonym search
     45DEFINE_TESTCASE(synonym1, backend) {
     46    Xapian::Database db(get_database("etext"));
     47
     48    TEST_REL(db.get_doclength_upper_bound(), >, 0);
     49
     50    Xapian::doccount lots = 214;
     51
     52    // Make a list of lists of subqueries, which are going to be joined
     53    // together as a synonym.
     54    vector<vector<Xapian::Query> > subqueries_list;
     55
     56    // For each set of subqueries, keep a list of the number of results for
     57    // which the weight should be the same when combined with OP_SYNONYM as
     58    // when combined with OP_OR.
     59    vector<int> subqueries_sameweight_count;
     60    vector<int> subqueries_diffweight_count;
     61
     62    vector<Xapian::Query> subqueries;
     63    subqueries.push_back(Xapian::Query("date"));
     64    subqueries_list.push_back(subqueries);
     65    // Single term - all 33 results should be same weight.
     66    subqueries_sameweight_count.push_back(33);
     67    subqueries_diffweight_count.push_back(0);
     68
     69    // Two terms, which co-occur in some documents.
     70    subqueries.clear();
     71    subqueries.push_back(Xapian::Query("sky"));
     72    subqueries.push_back(Xapian::Query("date"));
     73    subqueries_list.push_back(subqueries);
     74    // All 34 results should be different.
     75    subqueries_sameweight_count.push_back(0);
     76    subqueries_diffweight_count.push_back(34);
     77
     78    // Two terms which are entirely disjoint, and where the maximum weight
     79    // doesn't occur in the first or second match.
     80    subqueries.clear();
     81    subqueries.push_back(Xapian::Query("gutenberg"));
     82    subqueries.push_back(Xapian::Query("blockhead"));
     83    subqueries_list.push_back(subqueries);
     84    // All 18 results should be different.
     85    subqueries_sameweight_count.push_back(0);
     86    subqueries_diffweight_count.push_back(18);
     87
     88    subqueries.clear();
     89    subqueries.push_back(Xapian::Query("date"));
     90    subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
     91                                       Xapian::Query("sky"),
     92                                       Xapian::Query("glove")));
     93    subqueries_list.push_back(subqueries);
     94    // All 34 results should be different.
     95    subqueries_sameweight_count.push_back(0);
     96    subqueries_diffweight_count.push_back(34);
     97
     98    subqueries.clear();
     99    subqueries.push_back(Xapian::Query("date"));
     100    subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
     101                                       Xapian::Query("sky"),
     102                                       Xapian::Query("date")));
     103    subqueries_list.push_back(subqueries);
     104    // All 34 results should be different.
     105    subqueries_sameweight_count.push_back(0);
     106    subqueries_diffweight_count.push_back(34);
     107
     108    subqueries.clear();
     109    subqueries.push_back(Xapian::Query("date"));
     110    subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND_MAYBE,
     111                                       Xapian::Query("sky"),
     112                                       Xapian::Query("date")));
     113    subqueries_list.push_back(subqueries);
     114    // All 34 results should be different.
     115    subqueries_sameweight_count.push_back(0);
     116    subqueries_diffweight_count.push_back(34);
     117
     118    subqueries.clear();
     119    subqueries.push_back(Xapian::Query("date"));
     120    subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND_NOT,
     121                                       Xapian::Query("sky"),
     122                                       Xapian::Query("date")));
     123    subqueries_list.push_back(subqueries);
     124    // All 34 results should be different.
     125    subqueries_sameweight_count.push_back(0);
     126    subqueries_diffweight_count.push_back(34);
     127
     128    subqueries.clear();
     129    subqueries.push_back(Xapian::Query("date"));
     130    subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND,
     131                                       Xapian::Query("sky"),
     132                                       Xapian::Query("date")));
     133    subqueries_list.push_back(subqueries);
     134    // The AND only matches 1 document, so the estimated termfreq for the whole
     135    // synonym works out as 33 (due to rounding), which is the same as the
     136    // termfreq for "date".  Therefore most of the weights are the same as just
     137    // for the pure "date" search, and the only document which gets a different
     138    // weight is the one also matched by "sky" (because it has a wdf boost).
     139    subqueries_sameweight_count.push_back(32);
     140    subqueries_diffweight_count.push_back(1);
     141
     142    subqueries.clear();
     143    subqueries.push_back(Xapian::Query("date"));
     144    subqueries.push_back(Xapian::Query(Xapian::Query::OP_XOR,
     145                                       Xapian::Query("sky"),
     146                                       Xapian::Query("date")));
     147    subqueries_list.push_back(subqueries);
     148    // All 34 results should be different.
     149    subqueries_sameweight_count.push_back(0);
     150    subqueries_diffweight_count.push_back(34);
     151
     152    subqueries.clear();
     153    subqueries.push_back(Xapian::Query("date"));
     154    subqueries.push_back(Xapian::Query(Xapian::Query::OP_SYNONYM,
     155                                       Xapian::Query("sky"),
     156                                       Xapian::Query("date")));
     157    subqueries_list.push_back(subqueries);
     158    // When the top-level operator is OR, the synonym part has an estimated
     159    // termfreq of 35.  When the top-level operator is SYNONYM, the whole query
     160    // has an estimated termfreq of 35, and is in fact the same as the synonmyn
     161    // part in the OR query, except that the wqf of "date" is 2.  We're
     162    // currently not using the wqfs of components of synonyms, so this
     163    // difference has no effect on the weightings.  Therefore, for the 1
     164    // document which does not contain "data", we get the same result with
     165    // SYNONYM as with OR.
     166    subqueries_sameweight_count.push_back(1);
     167    subqueries_diffweight_count.push_back(33);
     168
     169    subqueries.clear();
     170    subqueries.push_back(Xapian::Query("sky"));
     171    subqueries.push_back(Xapian::Query("date"));
     172    subqueries.push_back(Xapian::Query("stein"));
     173    subqueries.push_back(Xapian::Query("ally"));
     174    subqueries_list.push_back(subqueries);
     175    // All 35 results should be different.
     176    subqueries_sameweight_count.push_back(0);
     177    subqueries_diffweight_count.push_back(35);
     178
     179    subqueries.clear();
     180    subqueries.push_back(Xapian::Query("attitud"));
     181    subqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
     182                                       Xapian::Query("german"),
     183                                       Xapian::Query("adventur")));
     184    subqueries_list.push_back(subqueries);
     185    // The estimated term frequency for the synoynm is 2 (because the estimate
     186    // for the phrase is 0), which is the same as the term frequency of
     187    // "attitud".  Thus, the synonym gets the same weight as "attitud", so
     188    // documents with only "attitud" (but not the phrase) in them get the same
     189    // wdf, and have the same total weight.  There turns out to be exactly one
     190    // such document.
     191    subqueries_sameweight_count.push_back(1);
     192    subqueries_diffweight_count.push_back(3);
     193
     194    for (vector<vector<Xapian::Query> >::size_type subqgroup = 0;
     195         subqgroup != subqueries_list.size(); ++subqgroup)
     196    {
     197        vector<Xapian::Query> * qlist = &(subqueries_list[subqgroup]);
     198        // Run two queries, one joining the subqueries with OR and one joining
     199        // them with SYNONYM.
     200        Xapian::Enquire enquire(db);
     201
     202        // Do the search with OR
     203        Xapian::Query orquery(Xapian::Query::OP_OR, qlist->begin(), qlist->end());
     204        enquire.set_query(orquery);
     205        Xapian::MSet ormset = enquire.get_mset(0, lots);
     206
     207        // Do the search with synonym, getting all the results.
     208        Xapian::Query synquery(Xapian::Query::OP_SYNONYM, qlist->begin(), qlist->end());
     209        enquire.set_query(synquery);
     210        Xapian::MSet synmset = enquire.get_mset(0, lots);
     211
     212        tout << "Comparing " << orquery << " with " << synquery << '\n';
     213
     214        // Check that the queries return some results.
     215        TEST_NOT_EQUAL(synmset.size(), 0);
     216        // Check that the queries return the same number of results.
     217        TEST_EQUAL(synmset.size(), ormset.size());
     218        map<Xapian::docid, Xapian::weight> values_or;
     219        map<Xapian::docid, Xapian::weight> values_synonym;
     220        for (Xapian::doccount i = 0; i < synmset.size(); ++i) {
     221            values_or[*ormset[i]] = ormset[i].get_weight();
     222            values_synonym[*synmset[i]] = synmset[i].get_weight();
     223        }
     224        TEST_EQUAL(values_or.size(), values_synonym.size());
     225
     226        /* Check that the most of the weights for items in the "or" mset are
     227         * different from those in the "synonym" mset. */
     228        int same_weight = 0;
     229        int different_weight = 0;
     230        for (map<Xapian::docid, Xapian::weight>::const_iterator
     231             j = values_or.begin(); j != values_or.end(); ++j) {
     232            Xapian::docid did = j->first;
     233            // Check that all the results in the or tree make it to the synonym
     234            // tree.
     235            TEST(values_synonym.find(did) != values_synonym.end());
     236            if (values_or[did] == values_synonym[did]) {
     237                ++same_weight;
     238            } else {
     239                ++different_weight;
     240            }
     241        }
     242
     243        int expected_same = subqueries_sameweight_count[subqgroup];
     244        int expected_diff = subqueries_diffweight_count[subqgroup];
     245
     246        TEST_EQUAL(different_weight, expected_diff);
     247        TEST_EQUAL(same_weight, expected_same);
     248
     249        // Do the search with synonym, but just get the top result.
     250        // (Regression test - the OR subquery in the synonym postlist tree used
     251        // to shortcut incorrectly, and return the wrong result here).
     252        Xapian::MSet mset_top = enquire.get_mset(0, 1);
     253        TEST_EQUAL(mset_top.size(), 1);
     254        TEST(mset_range_is_same(mset_top, 0, synmset, 0, 1));
     255    }
     256    return true;
     257}
     258
     259// Regression test - test a synonym search with a MultiAndPostlist.
     260DEFINE_TESTCASE(synonym2, backend) {
     261    Xapian::Query query;
     262    vector<Xapian::Query> subqueries;
     263    subqueries.push_back(Xapian::Query("file"));
     264    subqueries.push_back(Xapian::Query("the"));
     265    subqueries.push_back(Xapian::Query("next"));
     266    subqueries.push_back(Xapian::Query("reader"));
     267    query = Xapian::Query(Xapian::Query::OP_AND, subqueries.begin(), subqueries.end());
     268    subqueries.clear();
     269    subqueries.push_back(query);
     270    subqueries.push_back(Xapian::Query("gutenberg"));
     271    query = Xapian::Query(Xapian::Query::OP_SYNONYM, subqueries.begin(), subqueries.end());
     272
     273    tout << query << '\n';
     274
     275    Xapian::Database db(get_database("etext"));
     276    Xapian::Enquire enquire(db);
     277    enquire.set_query(query);
     278    Xapian::MSet mset = enquire.get_mset(0, 10);
     279    tout << mset << '\n';
     280
     281    // Regression test that OP_SCALE_WEIGHT works with OP_SYNONYM
     282    double maxposs = mset.get_max_possible();
     283    query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 10.0);
     284    enquire.set_query(query);
     285    mset = enquire.get_mset(0, 10);
     286    double maxposs2 = mset.get_max_possible();
     287
     288    TEST_EQUAL_DOUBLE(maxposs * 10.0, maxposs2);
     289
     290    return true;
     291}
     292
     293static void
     294check_msets_contain_same_docs(const Xapian::MSet & mset1,
     295                              const Xapian::MSet & mset2)
     296{
     297    TEST_EQUAL(mset1.size(), mset2.size());
     298
     299    set<Xapian::docid> docids;
     300    for (Xapian::doccount i = 0; i < mset1.size(); ++i) {
     301        docids.insert(*mset1[i]);
     302    }
     303
     304    // Check that all the results in mset1 are in mset2.
     305    for (Xapian::doccount j = 0; j < mset2.size(); ++j) {
     306        // Check that we can erase each entry from mset2 element.  Since mset1
     307        // and mset2 are the same size this means we can be sure that there
     308        // were no repeated docids in either (it would be a bug if there were).
     309        TEST(docids.erase(*mset2[j]));
     310    }
     311}
     312
     313// Test a synonym search which has had its weight scaled to 0.
     314DEFINE_TESTCASE(synonym3, backend) {
     315    Xapian::Query query = Xapian::Query(Xapian::Query::OP_SYNONYM,
     316                                        Xapian::Query("sky"),
     317                                        Xapian::Query("date"));
     318
     319    Xapian::Database db(get_database("etext"));
     320    Xapian::Enquire enquire(db);
     321    enquire.set_query(query);
     322    Xapian::MSet mset_orig = enquire.get_mset(0, db.get_doccount());
     323
     324    tout << query << '\n';
     325    tout << mset_orig << '\n';
     326
     327    // Test that OP_SCALE_WEIGHT with a factor of 0.0 works with OP_SYNONYM
     328    // (this has a special codepath to avoid doing the synonym calculation).
     329    query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 0.0);
     330    enquire.set_query(query);
     331    Xapian::MSet mset_zero = enquire.get_mset(0, db.get_doccount());
     332
     333    tout << query << '\n';
     334    tout << mset_zero << '\n';
     335
     336    // Check that the queries return some results.
     337    TEST_NOT_EQUAL(mset_zero.size(), 0);
     338    // Check that the queries return the same document IDs, and the zero
     339    // one has zero weight.
     340    check_msets_contain_same_docs(mset_orig, mset_zero);
     341    for (Xapian::doccount i = 0; i < mset_orig.size(); ++i) {
     342        TEST_NOT_EQUAL(mset_orig[i].get_weight(), 0.0);
     343        TEST_EQUAL(mset_zero[i].get_weight(), 0.0);
     344    }
     345
     346    return true;
     347}
     348
     349// Test synonym searches combined with various operators.
     350DEFINE_TESTCASE(synonym4, backend) {
     351    Xapian::Database db(get_database("etext"));
     352    Xapian::Enquire enquire(db);
     353    Xapian::Query syn_query = Xapian::Query(Xapian::Query::OP_SYNONYM,
     354                                            Xapian::Query("gutenberg"),
     355                                            Xapian::Query("blockhead"));
     356    Xapian::Query or_query = Xapian::Query(Xapian::Query::OP_OR,
     357                                           Xapian::Query("gutenberg"),
     358                                           Xapian::Query("blockhead"));
     359    Xapian::Query date_query = Xapian::Query("date");
     360
     361    // Check some queries.
     362    static const Xapian::Query::op operators[] = {
     363        Xapian::Query::OP_AND_MAYBE,
     364        Xapian::Query::OP_AND_NOT,
     365        Xapian::Query::OP_AND,
     366        Xapian::Query::OP_XOR,
     367        Xapian::Query::OP_OR,
     368        Xapian::Query::OP_SYNONYM
     369    };
     370    const Xapian::Query::op * end;
     371    end = operators + sizeof(operators) / sizeof(operators[0]);
     372    for (const Xapian::Query::op * i = operators; i != end; ++i) {
     373        tout.str(string());
     374        Xapian::Query query1(*i, syn_query, date_query);
     375        Xapian::Query query2(*i, or_query, date_query);
     376
     377        enquire.set_query(query1);
     378        tout << "query1:" << query1 << '\n';
     379        Xapian::MSet mset1 = enquire.get_mset(0, db.get_doccount());
     380        tout << "mset1:" << mset1 << '\n';
     381        enquire.set_query(query2);
     382        tout << "query2:" << query2 << '\n';
     383        Xapian::MSet mset2 = enquire.get_mset(0, db.get_doccount());
     384        tout << "mset2:" << mset2 << '\n';
     385
     386        TEST_NOT_EQUAL(mset1.size(), 0);
     387        check_msets_contain_same_docs(mset1, mset2);
     388    }
     389
     390    return true;
     391}
  • xapian-core/tests/queryparsertest.cc

    Property changes on: xapian-core/tests/api_opsynonym.cc
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    11/* queryparsertest.cc: Tests of Xapian::QueryParser
    22 *
    33 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
     4 * Copyright (C) 2007,2009 Lemur Consulting Ltd
    45 *
    56 * This program is free software; you can redistribute it and/or
    67 * modify it under the terms of the GNU General Public License as
     
    789790    Xapian::Query qobj = qp.parse_query("ab*", Xapian::QueryParser::FLAG_WILDCARD);
    790791    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(abc:(pos=1))");
    791792    qobj = qp.parse_query("muscle*", Xapian::QueryParser::FLAG_WILDCARD);
    792     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) OR musclebound:(pos=1)))");
     793    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) SYNONYM musclebound:(pos=1)))");
    793794    qobj = qp.parse_query("meat*", Xapian::QueryParser::FLAG_WILDCARD);
    794795    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query()");
    795796    qobj = qp.parse_query("musc*", Xapian::QueryParser::FLAG_WILDCARD);
    796     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1)))");
     797    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)))");
    797798    qobj = qp.parse_query("mutt*", Xapian::QueryParser::FLAG_WILDCARD);
    798799    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(mutton:(pos=1))");
    799800    // Regression test (we weren't lowercasing terms before checking if they
     
    886887    qp.add_prefix("author", "A");
    887888    Xapian::Query qobj;
    888889    qobj = qp.parse_query("author:h*", Xapian::QueryParser::FLAG_WILDCARD);
    889     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) OR Ahuxley:(pos=1)))");
     890    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)))");
    890891    qobj = qp.parse_query("author:h* test", Xapian::QueryParser::FLAG_WILDCARD);
    891     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) OR Ahuxley:(pos=1) OR test:(pos=2)))");
     892    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)) OR test:(pos=2)))");
    892893    return true;
    893894#endif
    894895}
     
    918919    doc.add_term("XTcowl");
    919920    doc.add_term("XTcox");
    920921    doc.add_term("ZXTcow");
     922    doc.add_term("XONEpartial");
     923    doc.add_term("XONEpartial2");
     924    doc.add_term("XTWOpartial3");
     925    doc.add_term("XTWOpartial4");
    921926    db.add_document(doc);
    922927    Xapian::QueryParser qp;
    923928    qp.set_database(db);
     
    933938    qobj = qp.parse_query("ab", Xapian::QueryParser::FLAG_PARTIAL);
    934939    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR Zab:(pos=1)))");
    935940    qobj = qp.parse_query("muscle", Xapian::QueryParser::FLAG_PARTIAL);
    936     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) OR musclebound:(pos=1) OR Zmuscl:(pos=1)))");
     941    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscle:(pos=1) SYNONYM musclebound:(pos=1)) OR Zmuscl:(pos=1)))");
    937942    qobj = qp.parse_query("meat", Xapian::QueryParser::FLAG_PARTIAL);
    938943    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(Zmeat:(pos=1))");
    939944    qobj = qp.parse_query("musc", Xapian::QueryParser::FLAG_PARTIAL);
    940     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1) OR Zmusc:(pos=1)))");
     945    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)) OR Zmusc:(pos=1)))");
    941946    qobj = qp.parse_query("mutt", Xapian::QueryParser::FLAG_PARTIAL);
    942947    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((mutton:(pos=1) OR Zmutt:(pos=1)))");
    943948    qobj = qp.parse_query("abc musc", Xapian::QueryParser::FLAG_PARTIAL);
    944     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR muscat:(pos=2) OR muscle:(pos=2) OR musclebound:(pos=2) OR muscular:(pos=2) OR Zmusc:(pos=2)))");
     949    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR (muscat:(pos=2) SYNONYM muscle:(pos=2) SYNONYM musclebound:(pos=2) SYNONYM muscular:(pos=2)) OR Zmusc:(pos=2)))");
    945950    qobj = qp.parse_query("a* mutt", Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_WILDCARD);
    946951    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR mutton:(pos=2) OR Zmutt:(pos=2)))");
    947952
    948953    // Check behaviour with stemmed terms, and stem strategy STEM_SOME.
    949954    qobj = qp.parse_query("o", Xapian::QueryParser::FLAG_PARTIAL);
    950     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1) OR outside:(pos=1) OR Zo:(pos=1)))");
     955    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zo:(pos=1)))");
    951956    qobj = qp.parse_query("ou", Xapian::QueryParser::FLAG_PARTIAL);
    952     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1) OR outside:(pos=1) OR Zou:(pos=1)))");
     957    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zou:(pos=1)))");
    953958    qobj = qp.parse_query("out", Xapian::QueryParser::FLAG_PARTIAL);
    954     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1) OR outside:(pos=1) OR Zout:(pos=1)))");
     959    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zout:(pos=1)))");
    955960    qobj = qp.parse_query("outs", Xapian::QueryParser::FLAG_PARTIAL);
    956961    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR Zout:(pos=1)))");
    957962    qobj = qp.parse_query("outsi", Xapian::QueryParser::FLAG_PARTIAL);
     
    963968
    964969    // Check behaviour with capitalised terms, and stem strategy STEM_SOME.
    965970    qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL);
    966     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1,wqf=2) OR outside:(pos=1)))");
     971    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))");
    967972    qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL);
    968973    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR outs:(pos=1)))");
    969974    qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL);
     
    972977    // And now with stemming strategy STEM_ALL.
    973978    qp.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
    974979    qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL);
    975     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1,wqf=2) OR outside:(pos=1)))");
     980    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))");
    976981    qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL);
    977982    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR out:(pos=1)))");
    978983    qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL);
     
    981986    // Check handling of a case with a prefix.
    982987    qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
    983988    qobj = qp.parse_query("title:cow", Xapian::QueryParser::FLAG_PARTIAL);
    984     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcowl:(pos=1) OR XTcows:(pos=1) OR ZXTcow:(pos=1)))");
     989    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR ZXTcow:(pos=1)))");
    985990    qobj = qp.parse_query("title:cows", Xapian::QueryParser::FLAG_PARTIAL);
    986991    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcows:(pos=1) OR ZXTcow:(pos=1)))");
    987992    qobj = qp.parse_query("title:Cow", Xapian::QueryParser::FLAG_PARTIAL);
    988     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcowl:(pos=1) OR XTcows:(pos=1) OR XTcow:(pos=1)))");
     993    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR XTcow:(pos=1)))");
    989994    qobj = qp.parse_query("title:Cows", Xapian::QueryParser::FLAG_PARTIAL);
    990995    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(XTcows:(pos=1,wqf=2))");
    991996
     
    993998    // inflate the wqf of the "parsed as normal" version of a partial term
    994999    // by multiplying it by the number of prefixes mapped to.
    9951000    qobj = qp.parse_query("double:vision", Xapian::QueryParser::FLAG_PARTIAL);
    996     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) OR ZXTWOvision:(pos=1)))");
     1001    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) SYNONYM ZXTWOvision:(pos=1)))");
     1002
     1003    // Test handling of FLAG_PARTIAL when there's more than one prefix.
     1004    qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL);
     1005    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (ZXONEpart:(pos=1) SYNONYM ZXTWOpart:(pos=1))))");
     1006
     1007    // Test handling of FLAG_PARTIAL when there's more than one prefix, without
     1008    // stemming.
     1009    qp.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
     1010    qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL);
     1011    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpart:(pos=1) SYNONYM XTWOpart:(pos=1))))");
     1012    qobj = qp.parse_query("double:partial", Xapian::QueryParser::FLAG_PARTIAL);
     1013    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpartial:(pos=1) SYNONYM XTWOpartial:(pos=1))))");
    9971014
    9981015    return true;
    9991016#endif
     
    15631580}
    15641581
    15651582static test test_synonym_queries[] = {
    1566     { "searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1))" },
    1567     { "search", "(Zsearch:(pos=1) OR find:(pos=1))" },
    1568     { "Search", "(search:(pos=1) OR find:(pos=1))" },
     1583    { "searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" },
     1584    { "search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" },
     1585    { "Search", "(search:(pos=1) SYNONYM find:(pos=1))" },
    15691586    { "Searching", "searching:(pos=1)" },
    1570     { "searching OR terms", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },
    1571     { "search OR terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
    1572     { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) OR find:(pos=1)))" },
    1573     { "search -terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_NOT Zterm:(pos=2))" },
    1574     { "+search terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
    1575     { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) OR find:(pos=1)))" },
    1576     { "search terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
     1587    { "searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" },
     1588    { "search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
     1589    { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1590    { "search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" },
     1591    { "+search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
     1592    { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1593    { "search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
    15771594    // Shouldn't trigger synonyms:
    15781595    { "\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" },
    15791596    { NULL, NULL }
     
    16131630
    16141631static test test_multi_synonym_queries[] = {
    16151632    { "sun OR tan OR cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3))" },
    1616     { "sun tan", "(Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1))" },
    1617     { "sun tan cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3) OR lotion:(pos=1))" },
    1618     { "beach sun tan holiday", "(Zbeach:(pos=1) OR Zsun:(pos=2) OR Ztan:(pos=3) OR bathe:(pos=2) OR Zholiday:(pos=4))" },
    1619     { "sun tan sun tan cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1) OR Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5) OR lotion:(pos=3))" },
    1620     { "single", "(Zsingl:(pos=1) OR record:(pos=1))" },
     1633    { "sun tan", "((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1))" },
     1634    { "sun tan cream", "((Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3)) SYNONYM lotion:(pos=1))" },
     1635    { "beach sun tan holiday", "(Zbeach:(pos=1) OR ((Zsun:(pos=2) OR Ztan:(pos=3)) SYNONYM bathe:(pos=2)) OR Zholiday:(pos=4))" },
     1636    { "sun tan sun tan cream", "(((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1)) OR ((Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5)) SYNONYM lotion:(pos=3)))" },
     1637    { "single", "(Zsingl:(pos=1) SYNONYM record:(pos=1))" },
    16211638    { NULL, NULL }
    16221639};
    16231640
     
    16561673
    16571674static test test_synonym_op_queries[] = {
    16581675    { "searching", "Zsearch:(pos=1)" },
    1659     { "~searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1))" },
    1660     { "~search", "(Zsearch:(pos=1) OR find:(pos=1))" },
    1661     { "~Search", "(search:(pos=1) OR find:(pos=1))" },
     1676    { "~searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" },
     1677    { "~search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" },
     1678    { "~Search", "(search:(pos=1) SYNONYM find:(pos=1))" },
    16621679    { "~Searching", "searching:(pos=1)" },
    1663     { "~searching OR terms", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },
    1664     { "~search OR terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
    1665     { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) OR find:(pos=1)))" },
    1666     { "~search -terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_NOT Zterm:(pos=2))" },
    1667     { "+~search terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
    1668     { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) OR find:(pos=1)))" },
    1669     { "~search terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
     1680    { "~searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" },
     1681    { "~search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
     1682    { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1683    { "~search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" },
     1684    { "+~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
     1685    { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1686    { "~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
    16701687    // FIXME: should look for multi-term synonym...
    16711688    { "~\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" },
    16721689    { NULL, NULL }
  • xapian-core/tests/Makefile.am

     
    116116 api_db.cc \
    117117 api_generated.cc \
    118118 api_nodb.cc \
     119 api_opsynonym.cc \
    119120 api_percentages.cc \
    120121 api_posdb.cc \
    121122 api_query.cc \
  • xapian-core/include/xapian/query.h

    Property changes on: xapian-core/tests
    ___________________________________________________________________
    Modified: svn:ignore
       - .*.sw?
    *.lo
    *.pyc
    Makefile.in
    Makefile
    .deps
    .libs
    apitest
    internaltest
    perftest
    queryparsertest
    runsrv
    runtest
    stemtest
    termgentest
    apitest.exe
    internaltest.exe
    perftest.exe
    queryparsertest.exe
    stemtest.exe
    termgentest.exe
    .chert
    .flint
    .multi
    .multichert
    .multiflint
    .stub
    api_all.h
    api_anydb.h
    api_backend.h
    api_closedb.h
    api_collapse.h
    api_collated.h
    api_collated.stamp
    api_db.h
    api_generated.cc
    api_generated.h
    api_nodb.h
    api_percentages.h
    api_posdb.h
    api_replicate.h
    api_query.h
    api_scalability.h
    api_serialise.h
    api_sorting.h
    api_spelling.h
    api_transdb.h
    api_unicode.h
    api_valuestats.h
    api_valuestream.h
    api_wrdb.h
    perftest_all.h
    perftest_collated.h
    perftest_collated.stamp
    perftest_matchdecider.h
    perftest_randomidx.h
    perflog.xml
    submitperftest
    
       + .*.sw?
    *.lo
    *.pyc
    Makefile.in
    Makefile
    .deps
    .libs
    apitest
    internaltest
    perftest
    queryparsertest
    runsrv
    runtest
    stemtest
    termgentest
    apitest.exe
    internaltest.exe
    perftest.exe
    queryparsertest.exe
    stemtest.exe
    termgentest.exe
    .chert
    .flint
    .multi
    .multichert
    .multiflint
    .stub
    api_all.h
    api_anydb.h
    api_backend.h
    api_closedb.h
    api_collapse.h
    api_collated.h
    api_collated.stamp
    api_db.h
    api_generated.cc
    api_generated.h
    api_nodb.h
    api_opsynonym.h
    api_percentages.h
    api_posdb.h
    api_replicate.h
    api_query.h
    api_scalability.h
    api_serialise.h
    api_sorting.h
    api_spelling.h
    api_transdb.h
    api_unicode.h
    api_valuestats.h
    api_valuestream.h
    api_wrdb.h
    perftest_all.h
    perftest_collated.h
    perftest_collated.stamp
    perftest_matchdecider.h
    perftest_randomidx.h
    perflog.xml
    submitperftest
    
    
     
    119119            OP_VALUE_GE,
    120120
    121121            /** Filter by a less-than-or-equal test on a document value. */
    122             OP_VALUE_LE
     122            OP_VALUE_LE,
     123
     124            /** Treat a set of queries as synonyms.
     125             *
     126             *  This returns all results which match at least one of the
     127             *  queries, but weighting as if all the sub-queries are instances
     128             *  of the same term: so multiple matching terms for a document
     129             *  increase the wdf value used, and the term frequency is based on
     130             *  the number of documents which would match an OR of all the
     131             *  subqueries.
     132             *
     133             *  The term frequency used will usually be an approximation,
     134             *  because calculating the precise combined term frequency would
     135             *  be overly expensive.
     136             *
     137             *  Identical to OP_OR, except for the weightings returned.
     138             */
     139            OP_SYNONYM
    123140        } op;
    124141
    125142        /** Copy constructor. */
  • xapian-core/include/xapian/weight.h

     
    22 * @brief Weighting scheme API.
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    212213               const std::string & term, Xapian::termcount wqf_,
    213214               double factor);
    214215
     216    /** @private @internal Initialise this object to calculate weights for a
     217     *  synonym.
     218     *
     219     *  @param stats       Source of statistics.
     220     *  @param query_len_  Query length.
     221     *  @param factor      Any scaling factor (e.g. from OP_SCALE_WEIGHT).
     222     *  @param termfreq    The termfreq to use.
     223     *  @param reltermfreq The reltermfreq to use.
     224     */
     225    void init_(const Internal & stats, Xapian::termcount query_len_,
     226               double factor, Xapian::doccount termfreq,
     227               Xapian::doccount reltermfreq);
     228
    215229    /** @private @internal Initialise this object to calculate the extra weight
    216230     *  component.
    217231     *
     
    230244        return stats_needed & DOC_LENGTH;
    231245    }
    232246
     247    /** @private @internal Return true if the WDF is needed.
     248     *
     249     *  If this method returns true, then the WDF will be fetched and passed to
     250     *  @a get_sumpart().  Otherwise 0 may be passed for the wdf.
     251     */
     252    bool get_sumpart_needs_wdf_() const {
     253        return stats_needed & WDF;
     254    }
     255
    233256  protected:
    234257    /// Only allow subclasses to copy us.
    235258    Weight(const Weight &);
     
    373396        need_stat(RELTERMFREQ);
    374397        need_stat(WDF);
    375398        need_stat(WDF_MAX);
     399        need_stat(WDF);
    376400        if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) {
    377401            need_stat(DOC_LENGTH_MIN);
    378402            need_stat(AVERAGE_LENGTH);
     
    392416        need_stat(RELTERMFREQ);
    393417        need_stat(WDF);
    394418        need_stat(WDF_MAX);
     419        need_stat(WDF);
    395420        need_stat(DOC_LENGTH_MIN);
    396421        need_stat(AVERAGE_LENGTH);
    397422        need_stat(DOC_LENGTH);
     
    455480        need_stat(DOC_LENGTH_MIN);
    456481        need_stat(WDF);
    457482        need_stat(WDF_MAX);
     483        need_stat(WDF);
    458484    }
    459485
    460486    std::string name() const;
  • xapian-core/common/output.h

     
    8181XAPIAN_OUTPUT_FUNCTION(Xapian::DatabaseReplica)
    8282
    8383#include "weightinternal.h"
     84XAPIAN_OUTPUT_FUNCTION(TermFreqs)
    8485XAPIAN_OUTPUT_FUNCTION(Xapian::Weight::Internal)
    8586
    8687#endif /* XAPIAN_INCLUDED_OUTPUT_H */
  • xapian-core/common/remoteprotocol.h

     
    4040// 30.5: New MSG_GETMSET which expects MSet's percent_factor to be returned.
    4141// 30.6: Support for OP_VALUE_GE and OP_VALUE_LE in query serialisation
    4242// 31: Clean up for Xapian 1.1.0
     43
     44// NOTE: when next breaking compatibility, address the FIXME in
     45// net/serialise.cc in serialise_stats() regarding serialising the termfreq and
     46// reltermfreqs together, rather than as separate lists.
     47
    4348#define XAPIAN_REMOTE_PROTOCOL_MAJOR_VERSION 31
    4449#define XAPIAN_REMOTE_PROTOCOL_MINOR_VERSION 0
    4550
  • xapian-core/api/omqueryinternal.cc

    Property changes on: xapian-core/m4-macros/xapian-1.1.m4
    ___________________________________________________________________
    Deleted: svn:mergeinfo
    
     
    6565        case Xapian::Query::OP_VALUE_RANGE:
    6666        case Xapian::Query::OP_VALUE_GE:
    6767        case Xapian::Query::OP_VALUE_LE:
     68        case Xapian::Query::OP_SYNONYM:
    6869            return 0;
    6970        case Xapian::Query::OP_SCALE_WEIGHT:
    7071            return 1;
     
    100101        case Xapian::Query::OP_NEAR:
    101102        case Xapian::Query::OP_PHRASE:
    102103        case Xapian::Query::OP_ELITE_SET:
     104        case Xapian::Query::OP_SYNONYM:
    103105            return UINT_MAX;
    104106        default:
    105107            Assert(false);
     
    221223                result += ".";
    222224                result += str_parameter; // serialise_double(get_dbl_parameter());
    223225                break;
     226            case Xapian::Query::OP_SYNONYM:
     227                result += "=";
     228                break;
    224229        }
    225230    }
    226231    return result;
     
    251256        case Xapian::Query::OP_VALUE_GE:        name = "VALUE_GE"; break;
    252257        case Xapian::Query::OP_VALUE_LE:        name = "VALUE_LE"; break;
    253258        case Xapian::Query::OP_SCALE_WEIGHT:    name = "SCALE_WEIGHT"; break;
     259        case Xapian::Query::OP_SYNONYM:         name = "SYNONYM"; break;
    254260    }
    255261    return name;
    256262}
     
    584590                    return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT,
    585591                                            subqs, 0, param);
    586592                }
    587                 default:
     593                case '=': {
     594                    return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs);
     595                }
     596                default:
    588597                    LOGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
    589598                    throw Xapian::InvalidArgumentError("Invalid query string");
    590599            }
     
    809818        case OP_ELITE_SET:
    810819        case OP_OR:
    811820        case OP_XOR:
     821        case OP_SYNONYM:
    812822            // Doing an "OR" type operation - if we've got any MatchNothing
    813823            // subnodes, drop them; except that we mustn't become an empty
    814824            // node due to this, so we never drop a MatchNothing subnode
     
    900910                }
    901911            }
    902912            break;
    903         case OP_OR: case OP_AND: case OP_XOR:
     913        case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM:
    904914            // Remove duplicates if we can.
    905915            if (subqs.size() > 1) collapse_subqs();
    906916            break;
     
    944954void
    945955Xapian::Query::Internal::collapse_subqs()
    946956{
    947     Assert(op == OP_OR || op == OP_AND || op == OP_XOR);
     957    Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM);
    948958    typedef set<Xapian::Query::Internal *, SortPosName> subqtable;
    949959    subqtable sqtab;
    950960
     
    10381048    Assert(!is_leaf(op));
    10391049    if (subq == 0) {
    10401050        subqs.push_back(0);
    1041     } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
     1051    } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) {
    10421052        // Distribute the subquery.
    10431053        for (subquery_list::const_iterator i = subq->subqs.begin();
    10441054             i != subq->subqs.end(); i++) {
     
    10551065    Assert(!is_leaf(op));
    10561066    if (subq == 0) {
    10571067        subqs.push_back(0);
    1058     } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
     1068    } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) {
    10591069        // Distribute the subquery.
    10601070        for (subquery_list::const_iterator i = subq->subqs.begin();
    10611071             i != subq->subqs.end(); i++) {
  • xapian-core/backends/multi/multi_postlist.cc

     
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
  • xapian-core/backends/multi/multi_postlist.h

     
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2003,2005,2007,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
  • xapian-bindings/python/smoketest2.py

     
    213213    qp.set_stemming_strategy(qp.STEM_SOME)
    214214    qp.set_stemmer(xapian.Stem('en'))
    215215    expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL),
    216                  "(Zfoo:(pos=1) AND (out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")
     216                 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))")
    217217
    218218    expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL),
    219219                 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")
  • xapian-bindings/python/smoketest3.py

    Property changes on: xapian-bindings/python/generate-python-exceptions
    ___________________________________________________________________
    Deleted: svn:mergeinfo
    
     
    153153
    154154    # Feature test for Document.values
    155155    count = 0
    156     for term in doc.values():
     156    for term in list(doc.values()):
    157157        count += 1
    158158    expect(count, 0, "Unexpected number of entries in doc.values")
    159159
     
    213213    qp.set_stemming_strategy(qp.STEM_SOME)
    214214    qp.set_stemmer(xapian.Stem('en'))
    215215    expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL),
    216                  "(Zfoo:(pos=1) AND (out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")
     216                 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))")
    217217
    218218    expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL),
    219219                 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")