Ticket #50: opsynonym_changes_12471_12472.patch

File opsynonym_changes_12471_12472.patch, 59.3 KB (added by Richard Boulton, 16 years ago)

Latest patch from trunk to the opsynonym branch

  • xapian-maintainer-tools/win32msvc/win32_matcher.mak

     
    3434    $(INTDIR)\queryoptimiser.obj\
    3535    $(INTDIR)\rset.obj\
    3636    $(INTDIR)\selectpostlist.obj\
     37    $(INTDIR)\synonympostlist.obj\
    3738    $(INTDIR)\valuerangepostlist.obj\
    3839    $(INTDIR)\valuegepostlist.obj\
    3940    $(INTDIR)\xorpostlist.obj\
     
    6061    $(INTDIR)\queryoptimiser.cc\
    6162    $(INTDIR)\rset.cc\
    6263    $(INTDIR)\selectpostlist.cc\
     64    $(INTDIR)\synonympostlist.cc\
    6365    $(INTDIR)\valuerangepostlist.cc\
    6466    $(INTDIR)\valuegepostlist.cc\
    6567    $(INTDIR)\xorpostlist.cc\
  • xapian-core/queryparser/queryparser.lemony

     
    22/* queryparser.lemony: build a Xapian::Query object from a user query string.
    33 *
    44 * Copyright (C) 2004,2005,2006,2007,2008 Olly Betts
     5 * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    160161
    161162    Query * as_wildcarded_query(State * state) const;
    162163
     164    /** Build a query for a term at the very end of a query when FLAG_PARTIAL
     165     *  is in use.
     166     *
     167     *  This query should match documents containin terms which starts with the
     168     *  characters seen, but should match exact matches higher (since the user
     169     *  might have finished typing - we simply don't know).
     170     */
    163171    Query * as_partial_query(State * state_) const;
    164172
    165173    Query get_query() const;
     
    279287            end = db.synonyms_end(term);
    280288        }
    281289        while (syn != end) {
    282             q = Query(Query::OP_OR, q, Query(*syn, 1, pos));
     290            q = Query(Query::OP_SYNONYM, q, Query(*syn, 1, pos));
    283291            ++syn;
    284292        }
    285293    }
     
    345353        }
    346354    }
    347355    delete this;
    348     return new Query(Query::OP_OR, subqs.begin(), subqs.end());
     356    return new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
    349357}
    350358
    351359Query *
    352360Term::as_partial_query(State * state_) const
    353361{
    354362    Database db = state_->get_database();
    355     vector<Query> subqs;
     363    vector<Query> subqs_partial; // A synonym of all the partial terms.
     364    vector<Query> subqs_full; // A synonym of all the full terms.
    356365    list<string>::const_iterator piter;
    357366    for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
    358367        string root = *piter;
    359368        root += name;
    360369        TermIterator t = db.allterms_begin(root);
    361370        while (t != db.allterms_end(root)) {
    362             subqs.push_back(Query(*t, 1, pos));
     371            subqs_partial.push_back(Query(*t, 1, pos));
    363372            ++t;
    364373        }
    365374        // Add the term, as it would normally be handled, as an alternative.
    366         subqs.push_back(Query(make_term(*piter), 1, pos));
     375        subqs_full.push_back(Query(make_term(*piter), 1, pos));
    367376    }
    368377    delete this;
    369     return new Query(Query::OP_OR, subqs.begin(), subqs.end());
     378    return new Query(Query::OP_OR,
     379                     Query(Query::OP_SYNONYM,
     380                           subqs_partial.begin(), subqs_partial.end()),
     381                     Query(Query::OP_SYNONYM,
     382                           subqs_full.begin(), subqs_full.end()));
    370383}
    371384
    372385inline bool
     
    11681181                subqs2.push_back(Query(*syn, 1, pos));
    11691182                ++syn;
    11701183            }
    1171             Query q_synonym_terms(Query::OP_OR, subqs2.begin(), subqs2.end());
     1184            Query q_synonym_terms(Query::OP_SYNONYM, subqs2.begin(), subqs2.end());
    11721185            subqs2.clear();
    1173             subqs.push_back(Query(Query::OP_OR,
     1186            subqs.push_back(Query(Query::OP_SYNONYM,
    11741187                                  q_original_terms, q_synonym_terms));
    11751188        }
    11761189    } else {
  • xapian-core/matcher/Makefile.mk

     
    1818        matcher/queryoptimiser.h\
    1919        matcher/remotesubmatch.h\
    2020        matcher/selectpostlist.h\
     21        matcher/synonympostlist.h\
    2122        matcher/valuegepostlist.h\
    2223        matcher/valuerangepostlist.h\
    2324        matcher/xorpostlist.h
     
    5455        matcher/queryoptimiser.cc\
    5556        matcher/rset.cc\
    5657        matcher/selectpostlist.cc\
     58        matcher/synonympostlist.cc\
    5759        matcher/valuegepostlist.cc\
    5860        matcher/valuerangepostlist.cc\
    5961        matcher/xorpostlist.cc
  • xapian-core/matcher/branchpostlist.cc

     
    11/** @file branchpostlist.cc
    22 * @brief Virtual base class for branched types of postlist.
    33 */
    4 /* Copyright (C) 2007 Lemur Consulting Ltd
    5  * Copyright (C) 2007 Olly Betts
     4/* Copyright (C) 2007 Olly Betts
    65 *
    76 * This program is free software; you can redistribute it and/or
    87 * modify it under the terms of the GNU General Public License as
     
    2928    delete l;
    3029    delete r;
    3130}
    32 
    33 Xapian::termcount
    34 BranchPostList::get_wdf() const
    35 {
    36     return l->get_wdf() + r->get_wdf();
    37 }
  • xapian-core/matcher/branchpostlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2007 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
    76 *
    87 * This program is free software; you can redistribute it and/or
    98 * modify it under the terms of the GNU General Public License as
     
    7271                : l(l_), r(r_), matcher(matcher_) {}
    7372
    7473        virtual ~BranchPostList();
    75 
    76         /** get_wdf() for branch postlists returns the sum of the wdfs of the
    77          *  sub postlists.  The wdf isn't really meaningful in many situations,
    78          *  but if the lists are being combined as a synonym we want the sum of
    79          *  the wdfs, so we do that in general.
    80          */
    81         virtual Xapian::termcount get_wdf() const;
    8274};
    8375
    8476// Helper functions - call next/skip_to on a postlist and handle any
  • xapian-core/matcher/andpostlist.h

     
    22 *
    33 * Copyright 2002 Ananova Ltd
    44 * Copyright 2003,2004,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    7071                    MultiMatch *matcher_,
    7172                    Xapian::doccount dbsize_,
    7273                    bool replacement = false);
     74
     75        /** get_wdf() for AND postlists returns the sum of the wdfs of the sub
     76         *  postlists - this is desirable when the AND is part of a synonym.
     77         */
     78        Xapian::termcount get_wdf() const;
    7379};
    7480
    7581#endif /* OM_HGUARD_ANDPOSTLIST_H */
  • xapian-core/matcher/multimatch.cc

     
    790790
    791791                LOGVALUE(MATCH, denom);
    792792                LOGVALUE(MATCH, percent_scale);
    793                 Assert(percent_scale <= denom);
    794                 denom *= greatest_wt;
    795                 Assert(denom > 0);
    796                 percent_scale /= denom;
     793                AssertRel(percent_scale,<=,denom);
     794                if (denom == 0) {
     795                    // This happens if the top-level operator is OP_SYNONYM.
     796                    percent_scale = 1.0 / greatest_wt;
     797                } else {
     798                    denom *= greatest_wt;
     799                    AssertRel(denom,>,0);
     800                    percent_scale /= denom;
     801                }
    797802            } else {
    798803                // If all the terms match, the 2 sums of weights cancel
    799804                percent_scale = 1.0 / greatest_wt;
  • xapian-core/matcher/localmatch.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2008,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    3131#include "omdebug.h"
    3232#include "omqueryinternal.h"
    3333#include "queryoptimiser.h"
     34#include "synonympostlist.h"
    3435#include "weightinternal.h"
    3536
    3637#include <cfloat>
     
    111112}
    112113
    113114PostList *
     115LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher,
     116                                     double factor)
     117{
     118    DEBUGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist",
     119              "[or_pl], [matcher], " << factor);
     120    LOGVALUE(MATCH, or_pl->get_termfreq_est());
     121    AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher));
     122    AutoPtr<Xapian::Weight> wt(wt_factory->clone_());
     123
     124    // FIXME - calculate the reltermfreq to use and pass it in?
     125    wt->init_(*stats, qlen, factor, or_pl->get_termfreq_est());
     126
     127    res->set_weight(wt.release());
     128    RETURN(res.release());
     129}
     130
     131PostList *
    114132LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query,
    115133                                           double factor)
    116134{
     
    132150        Xapian::doccount tf = stats->get_termfreq(query->tname);
    133151        Xapian::weight weight = boolean ? 0 : wt->get_maxpart();
    134152        Xapian::MSet::Internal::TermFreqAndWeight info(tf, weight);
     153        LOGLINE(MATCH, "Setting term_info[" << query->tname << "] to (" << tf << ", " << weight << ")");
    135154        term_info.insert(make_pair(query->tname, info));
    136155    } else if (!boolean) {
    137156        i->second.termweight += wt->get_maxpart();
     157        AssertEq(stats->get_termfreq(query->tname), i->second.termfreq);
     158        LOGLINE(MATCH, "Increasing term_info[" << query->tname << "] to (" << i->second.termfreq << ", " << i->second.termweight << ")");
    138159    }
    139160
    140161    LeafPostList * pl = db->open_post_list(query->tname);
  • xapian-core/matcher/localmatch.h

     
    22 *  @brief SubMatch class for a local database.
    33 */
    44/* Copyright (C) 2006,2007,2009 Olly Betts
     5 * Copyright (C) 2007 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    8283    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    8384        std::map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts);
    8485
     86    /** Convert a postlist into a synonym postlist.
     87     */
     88    PostList * make_synonym_postlist(PostList * or_pl, MultiMatch * matcher,
     89                                     double factor);
     90
    8591    /** Convert an OP_LEAF query to a PostList.
    8692     *
    8793     *  This is called by QueryOptimiser when it reaches an OP_LEAF query.
  • xapian-core/matcher/xorpostlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    6970                    PostList * right_,
    7071                    MultiMatch * matcher_,
    7172                    Xapian::doccount dbsize_);
     73
     74        /** get_wdf() for OR postlists returns the wdf of the sub postlist
     75         *  which is at the current document.
     76         */
     77        virtual Xapian::termcount get_wdf() const;
    7278};
    7379
    7480#endif /* OM_HGUARD_XORPOSTLIST_H */
  • xapian-core/matcher/synonympostlist.h

     
     1/** @file synonympostlist.h
     2 * @brief Combine subqueries, weighting as if they are synonyms
     3 */
     4/* Copyright 2007,2009 Lemur Consulting Ltd
     5 *
     6 * This program is free software; you can redistribute it and/or modify
     7 * it under the terms of the GNU General Public License as published by
     8 * the Free Software Foundation; either version 2 of the License, or
     9 * (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     19 */
     20
     21#ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
     22#define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
     23
     24#include "multimatch.h"
     25#include "postlist.h"
     26
     27/** A postlist comprising several postlists SYNONYMed together.
     28 *
     29 *  This postlist returns all postings in the OR of the sub postlists, but
     30 *  returns weights as if they represented a single term.  The term frequency
     31 *  portion of the weight is approximated.
     32 */
     33class SynonymPostList : public PostList {
     34    /** The subtree, which starts as an OR of all the sub-postlists being
     35     *  joined with Synonym, but may decay into something else.
     36     */
     37    PostList * subtree;
     38
     39    /** The object which is using this postlist to perform a match.
     40     *
     41     *  This object needs to be notified when the tree changes such that the
     42     *  maximum weights need to be recalculated.
     43     */
     44    MultiMatch *matcher;
     45
     46    /** Weighting object used for calculating the synonym weights.
     47     */
     48    const Xapian::Weight * wt;
     49
     50    /** Flag indicating whether the weighting object needs the doclength.
     51     */
     52    bool want_doclength;
     53
     54    /** Flag indicating whether the weighting object needs the wdf.
     55     */
     56    bool want_wdf;
     57
     58  public:
     59    SynonymPostList(PostList *subtree_, MultiMatch * matcher_);
     60
     61    ~SynonymPostList();
     62
     63    /** Set the weight object to be used for the synonym postlist.
     64     *
     65     *  Ownership of the weight object passes to the synonym postlist - the
     66     *  caller must not delete it after use.
     67     */
     68    void set_weight(const Xapian::Weight * wt_);
     69
     70    PostList *next(Xapian::weight w_min);
     71    PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
     72
     73    Xapian::weight get_weight() const;
     74    Xapian::weight get_maxweight() const;
     75    Xapian::weight recalc_maxweight();
     76
     77    // The following methods just call through to the subtree.
     78    Xapian::termcount get_wdf() const;
     79    Xapian::doccount get_termfreq_min() const;
     80    Xapian::doccount get_termfreq_est() const;
     81    Xapian::doccount get_termfreq_max() const;
     82    Xapian::docid get_docid() const;
     83    Xapian::termcount get_doclength() const;
     84    PositionList * read_position_list();
     85    PositionList * open_position_list() const;
     86    bool at_end() const;
     87
     88    std::string get_description() const;
     89};
     90
     91#endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */
  • xapian-core/matcher/andmaybepostlist.h

    Property changes on: xapian-core/matcher/synonympostlist.h
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    66 * Copyright 1999,2000,2001 BrightStation PLC
    77 * Copyright 2002 Ananova Ltd
    88 * Copyright 2003,2004,2009 Olly Betts
     9 * Copyright 2009 Lemur Consulting Ltd
    910 *
    1011 * This program is free software; you can redistribute it and/or
    1112 * modify it under the terms of the GNU General Public License as
     
    103104            lmax = l->get_maxweight();
    104105            rmax = r->get_maxweight();
    105106        }
     107
     108        /** get_wdf() for ANDMAYBE postlists returns the sum of the wdfs of the
     109         *  sub postlists which are at the current document - this is desirable
     110         *  when the AND is part of a synonym.
     111         */
     112        Xapian::termcount get_wdf() const;
    106113};
    107114
    108115#endif /* OM_HGUARD_ANDMAYBEPOSTLIST_H */
  • xapian-core/matcher/orpostlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    6768                   PostList * right_,
    6869                   MultiMatch * matcher_,
    6970                   Xapian::doccount dbsize_);
     71
     72        /** get_wdf() for OR postlists returns the sum of the wdfs of the
     73         *  sub postlists which are at the current document - this is desirable
     74         *  when the AND is part of a synonym.
     75         */
     76        virtual Xapian::termcount get_wdf() const;
    7077};
    7178
    7279#endif /* OM_HGUARD_ORPOSTLIST_H */
  • xapian-core/matcher/andnotpostlist.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2007,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    175176    DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_doclength", "");
    176177    RETURN(l->get_doclength());
    177178}
     179
     180Xapian::termcount
     181AndNotPostList::get_wdf() const
     182{
     183    DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_wdf", "");
     184    RETURN(l->get_wdf());
     185}
  • xapian-core/matcher/andnotpostlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    6970                                   Xapian::weight w_min,
    7071                                   Xapian::docid lh,
    7172                                   Xapian::docid rh);
     73
     74        /// get_wdf() for ANDNOT postlists returns the wdfs of the left hand side.
     75        Xapian::termcount get_wdf() const;
    7276};
    7377
    7478#endif /* OM_HGUARD_ANDNOTPOSTLIST_H */
  • xapian-core/matcher/queryoptimiser.cc

     
    8282        case Xapian::Query::OP_ELITE_SET:
    8383            RETURN(do_or_like(query, factor));
    8484
     85        case Xapian::Query::OP_SYNONYM:
     86            RETURN(do_synonym(query, factor));
     87
    8588        case Xapian::Query::OP_AND_NOT: {
    8689            AssertEq(query->subqs.size(), 2);
    8790            PostList * l = do_subquery(query->subqs[0], factor);
     
    304307    // for AND-like operations.
    305308    Xapian::Query::Internal::op_t op = query->op;
    306309    Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR ||
    307            op == Xapian::Query::OP_XOR);
     310           op == Xapian::Query::OP_XOR || op == Xapian::Query::OP_SYNONYM);
    308311
    309312    const Xapian::Query::Internal::subquery_list &queries = query->subqs;
    310313    AssertRel(queries.size(), >=, 2);
     
    382385                  ComparePostListTermFreqAscending());
    383386    }
    384387}
     388
     389PostList *
     390QueryOptimiser::do_synonym(const Xapian::Query::Internal *query, double factor)
     391{
     392    DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_synonym",
     393              query << ", " << factor);
     394
     395    if (factor == 0.0) {
     396        // If we have a factor of 0, we don't care about the weights, so
     397        // we're just like a normal OR query.
     398        RETURN(do_or_like(query, 0.0));
     399    }
     400
     401    AssertEq(query->wqf, 0); // FIXME - should we be doing something with the wqf?
     402
     403    // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
     404    // SynonymPostList, which supplies the weights.
     405    RETURN(localsubmatch.make_synonym_postlist(do_or_like(query, 0.0),
     406                                               matcher, factor));
     407}
  • xapian-core/matcher/queryoptimiser.h

     
    22 * @brief Convert a Xapian::Query::Internal tree into an optimal PostList tree.
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2008 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    8889     */
    8990    PostList * do_or_like(const Xapian::Query::Internal *query, double factor);
    9091
     92    /** Optimise a synonym Xapian::Query::Internal subtree into a PostList
     93     *
     94     *  @param query    The subtree to optimise.
     95     *  @param factor   How much to scale weights for this subtree by.
     96     *
     97     *  @return         A PostList subtree.
     98     */
     99    PostList * do_synonym(const Xapian::Query::Internal *query, double factor);
     100
    91101  public:
    92102    QueryOptimiser(const Xapian::Database::Internal & db_,
    93103                   LocalSubMatch & localsubmatch_,
  • xapian-core/matcher/andpostlist.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2007,2008,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    203203    AssertEq(doclength, r->get_doclength());
    204204    RETURN(doclength);
    205205}
     206
     207Xapian::termcount
     208AndPostList::get_wdf() const
     209{
     210    DEBUGCALL(MATCH, Xapian::termcount, "AndPostList::get_wdf", "");
     211    RETURN(l->get_wdf() + r->get_wdf());
     212}
  • xapian-core/matcher/xorpostlist.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2007,2008,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    294295    Assert(lhead > rhead);
    295296    return r->get_doclength();
    296297}
     298
     299Xapian::termcount
     300XorPostList::get_wdf() const
     301{
     302    DEBUGCALL(MATCH, Xapian::termcount, "XorPostList::get_wdf", "");
     303    if (lhead < rhead) RETURN(l->get_wdf());
     304    RETURN(r->get_wdf());
     305}
  • xapian-core/matcher/synonympostlist.cc

     
     1/** @file synonympostlist.cc
     2 * @brief Combine subqueries, weighting as if they are synonyms
     3 */
     4/* Copyright 2007,2009 Lemur Consulting Ltd
     5 *
     6 * This program is free software; you can redistribute it and/or
     7 * modify it under the terms of the GNU General Public License as
     8 * published by the Free Software Foundation; either version 2 of the
     9 * License, or (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
     19 * USA
     20 */
     21
     22#include <config.h>
     23
     24#include "synonympostlist.h"
     25
     26#include "branchpostlist.h"
     27#include "debuglog.h"
     28
     29SynonymPostList::SynonymPostList(PostList *subtree_,
     30                                 MultiMatch * matcher_)
     31        : subtree(subtree_),
     32          matcher(matcher_),
     33          wt(NULL),
     34          want_doclength(false),
     35          want_wdf(false)
     36{
     37}
     38
     39SynonymPostList::~SynonymPostList()
     40{
     41    delete wt;
     42    delete subtree;
     43}
     44
     45void
     46SynonymPostList::set_weight(const Xapian::Weight * wt_)
     47{
     48    delete wt;
     49    wt = wt_;
     50    want_doclength = wt_->get_sumpart_needs_doclength_();
     51    want_wdf = wt->get_sumpart_needs_wdf_();
     52}
     53
     54PostList *
     55SynonymPostList::next(Xapian::weight w_min)
     56{
     57    LOGCALL(MATCH, PostList *, "SynonymPostList::next", w_min);
     58    (void)w_min;
     59    next_handling_prune(subtree, 0, matcher);
     60    RETURN(NULL);
     61}
     62
     63PostList *
     64SynonymPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
     65{
     66    LOGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did << ", " << w_min);
     67    (void)w_min;
     68    skip_to_handling_prune(subtree, did, 0, matcher);
     69    RETURN(NULL);
     70}
     71
     72Xapian::weight
     73SynonymPostList::get_weight() const
     74{
     75
     76    // The wdf returned can be higher than the doclength.  In particular, this
     77    // can currently occur if the query contains a term more than once; the wdf
     78    // of each occurrence is added up.
     79    //
     80    // However, it's reasonable for weighting algorithms to optimise by
     81    // assuming that get_wdf() will always reeturn less than get_doclength(),
     82    // since the doclength is the sum of the wdfs.
     83    //
     84    // Therefore, we simply clamp the wdf value to the doclength, to ensure
     85    // that this is true.  Note that this requires the doclength to be
     86    // calculated even if the weight object doesn't want it.
     87
     88    if (want_wdf) {
     89        Xapian::termcount wdf = get_wdf();
     90        Xapian::termcount doclen = get_doclength();
     91        if (wdf > doclen) wdf = doclen;
     92        return wt->get_sumpart(wdf, doclen);
     93    }
     94    return wt->get_sumpart(0, want_doclength ? get_doclength() : 0);
     95}
     96
     97Xapian::weight
     98SynonymPostList::get_maxweight() const
     99{
     100    return wt->get_maxpart();
     101}
     102
     103Xapian::weight
     104SynonymPostList::recalc_maxweight()
     105{
     106    return SynonymPostList::get_maxweight();
     107}
     108
     109Xapian::termcount
     110SynonymPostList::get_wdf() const {
     111    return subtree->get_wdf();
     112}
     113
     114Xapian::doccount
     115SynonymPostList::get_termfreq_min() const {
     116    return subtree->get_termfreq_min();
     117}
     118
     119Xapian::doccount
     120SynonymPostList::get_termfreq_est() const {
     121    return subtree->get_termfreq_est();
     122}
     123
     124Xapian::doccount
     125SynonymPostList::get_termfreq_max() const {
     126    return subtree->get_termfreq_max();
     127}
     128
     129Xapian::docid
     130SynonymPostList::get_docid() const {
     131    return subtree->get_docid();
     132}
     133
     134Xapian::termcount
     135SynonymPostList::get_doclength() const {
     136    return subtree->get_doclength();
     137}
     138
     139PositionList *
     140SynonymPostList::read_position_list() {
     141    return subtree->read_position_list();
     142}
     143
     144PositionList *
     145SynonymPostList::open_position_list() const {
     146    return subtree->open_position_list();
     147}
     148
     149bool
     150SynonymPostList::at_end() const {
     151    return subtree->at_end();
     152}
     153
     154std::string
     155SynonymPostList::get_description() const
     156{
     157    return "(Synonym " + subtree->get_description() + ")";
     158}
  • xapian-core/matcher/orpostlist.cc

    Property changes on: xapian-core/matcher/synonympostlist.cc
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2001,2002 Ananova Ltd
    55 * Copyright 2003,2004,2007,2008,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    258259
    259260    RETURN(doclength);
    260261}
     262
     263Xapian::termcount
     264OrPostList::get_wdf() const
     265{
     266    DEBUGCALL(MATCH, Xapian::termcount, "OrPostList::get_wdf", "");
     267    if (lhead < rhead) RETURN(l->get_wdf());
     268    if (lhead > rhead) RETURN(r->get_wdf());
     269    RETURN(l->get_wdf() + r->get_wdf());
     270}
  • xapian-core/matcher/andmaybepostlist.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2005,2008,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    169170    if (lhead == rhead) AssertEq(l->get_doclength(), r->get_doclength());
    170171    RETURN(l->get_doclength());
    171172}
     173
     174Xapian::termcount
     175AndMaybePostList::get_wdf() const
     176{
     177    DEBUGCALL(MATCH, Xapian::termcount, "AndMaybePostList::get_wdf", "");
     178    if (lhead == rhead) RETURN(l->get_wdf() + r->get_wdf());
     179    RETURN(l->get_wdf());
     180}
  • xapian-core/weight/weight.cc

     
    7373    init(factor);
    7474}
    7575
     76void
     77Weight::init_(const Internal & stats, Xapian::termcount query_length,
     78              double factor, Xapian::doccount termfreq)
     79{
     80    // Synonym case.
     81    collection_size_ = stats.collection_size;
     82    rset_size_ = stats.rset_size;
     83    average_length_ = stats.get_average_length();
     84    doclength_upper_bound_ = stats.db.get_doclength_upper_bound();
     85    doclength_lower_bound_ = stats.db.get_doclength_lower_bound();
     86    // For a synonym, the doclength is an upper bound on the wdf.
     87    // FIXME: foo OP_SYNONYM foo could exceed this, but we probably need to
     88    // handle repeated terms better somehow.
     89    wdf_upper_bound_ = stats.db.get_doclength_upper_bound();
     90    termfreq_ = termfreq;
     91    reltermfreq_ = 0;
     92    query_length_ = query_length;
     93    wqf_ = 1;
     94    init(factor);
     95}
     96
    7697Weight::~Weight() { }
    7798
    7899}
  • xapian-core/tests/api_db.cc

     
    14571457    return true;
    14581458}
    14591459
     1460// Check a synonym search
     1461DEFINE_TESTCASE(synonym1, backend) {
     1462    Xapian::Database db(get_database("etext"));
     1463    Xapian::doccount lots = 214;
     1464
     1465    // Make a list of lists of subqueries, which are going to be joined
     1466    // together as a synonym.
     1467    vector<vector<Xapian::Query> > subqueries_list;
     1468
     1469    vector<Xapian::Query> subqueries;
     1470    subqueries.push_back(Xapian::Query("date"));
     1471    subqueries_list.push_back(subqueries);
     1472
     1473    // Two terms, which co-occur in some documents.
     1474    subqueries.clear();
     1475    subqueries.push_back(Xapian::Query("sky"));
     1476    subqueries.push_back(Xapian::Query("date"));
     1477    subqueries_list.push_back(subqueries);
     1478
     1479    // Two terms which are entirely disjoint, and where the maximum weight
     1480    // doesn't occur in the first or second match.
     1481    subqueries.clear();
     1482    subqueries.push_back(Xapian::Query("gutenberg"));
     1483    subqueries.push_back(Xapian::Query("blockhead"));
     1484    subqueries_list.push_back(subqueries);
     1485
     1486    subqueries.clear();
     1487    subqueries.push_back(Xapian::Query("date"));
     1488    subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
     1489                                       Xapian::Query("sky"),
     1490                                       Xapian::Query("glove")));
     1491    subqueries_list.push_back(subqueries);
     1492
     1493    subqueries.clear();
     1494    subqueries.push_back(Xapian::Query("sky"));
     1495    subqueries.push_back(Xapian::Query("date"));
     1496    subqueries.push_back(Xapian::Query("stein"));
     1497    subqueries.push_back(Xapian::Query("ally"));
     1498    subqueries_list.push_back(subqueries);
     1499
     1500    subqueries.clear();
     1501    subqueries.push_back(Xapian::Query("attitud"));
     1502    subqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
     1503                                       Xapian::Query("german"),
     1504                                       Xapian::Query("adventur")));
     1505    subqueries_list.push_back(subqueries);
     1506
     1507    for (vector<vector<Xapian::Query> >::const_iterator
     1508         qlist = subqueries_list.begin();
     1509         qlist != subqueries_list.end(); ++qlist)
     1510    {
     1511        // Run two queries, one joining the subqueries with OR and one joining them
     1512        // with SYNONYM.
     1513        Xapian::Enquire enquire(db);
     1514
     1515        // Do the search with OR
     1516        Xapian::Query orquery(Xapian::Query(Xapian::Query::OP_OR, qlist->begin(), qlist->end()));
     1517        enquire.set_query(orquery);
     1518        Xapian::MSet ormset = enquire.get_mset(0, lots);
     1519
     1520        // Do the search with synonym, getting all the results.
     1521        Xapian::Query synquery(Xapian::Query::OP_SYNONYM, qlist->begin(), qlist->end());
     1522        enquire.set_query(synquery);
     1523        Xapian::MSet mset = enquire.get_mset(0, lots);
     1524
     1525        // Check that the queries return some results.
     1526        TEST_NOT_EQUAL(mset.size(), 0);
     1527        // Check that the queries return the same number of results.
     1528        TEST_EQUAL(mset.size(), ormset.size());
     1529        map<Xapian::docid, Xapian::weight> values_or;
     1530        map<Xapian::docid, Xapian::weight> values_synonym;
     1531        for (Xapian::doccount i = 0; i < mset.size(); ++i) {
     1532            values_or[*ormset[i]] = ormset[i].get_weight();
     1533            values_synonym[*mset[i]] = mset[i].get_weight();
     1534        }
     1535        TEST_EQUAL(values_or.size(), values_synonym.size());
     1536
     1537        /* Check that the most of the weights for items in the "or" mset are
     1538         * different from those in the "synonym" mset. */
     1539        int same_weight = 0;
     1540        int different_weight = 0;
     1541        for (map<Xapian::docid, Xapian::weight>::const_iterator
     1542             j = values_or.begin();
     1543             j != values_or.end(); ++j)
     1544        {
     1545            Xapian::docid did = j->first;
     1546            // Check that all the results in the or tree make it to the synonym tree.
     1547            TEST(values_synonym.find(did) != values_synonym.end());
     1548            if (values_or[did] == values_synonym[did]) {
     1549                same_weight += 1;
     1550            } else {
     1551                different_weight += 1;
     1552            }
     1553        }
     1554        if (qlist->size() == 1) {
     1555            // Had a single term - check that all the weights were the same.
     1556            TEST_EQUAL(different_weight, 0);
     1557            TEST_NOT_EQUAL(same_weight, 0);
     1558        } else {
     1559            // Check that most of the weights differ.
     1560            TEST_NOT_EQUAL(different_weight, 0);
     1561            TEST_REL(same_weight, <, different_weight);
     1562        }
     1563
     1564        // Do the search with synonym, but just get the top result.
     1565        // (Regression test - the OR subquery in the synonym postlist tree used
     1566        // to shortcut incorrectly, and return the wrong result here).
     1567        Xapian::MSet mset_top = enquire.get_mset(0, 1);
     1568        TEST_EQUAL(mset_top.size(), 1);
     1569        TEST(mset_range_is_same(mset_top, 0, mset, 0, 1));
     1570    }
     1571    return true;
     1572}
     1573
     1574// Regression test - test a synonym search with a MultiAndPostlist.
     1575DEFINE_TESTCASE(synonym2, backend) {
     1576    Xapian::Query query;
     1577    vector<Xapian::Query> subqueries;
     1578    subqueries.push_back(Xapian::Query("file"));
     1579    subqueries.push_back(Xapian::Query("the"));
     1580    subqueries.push_back(Xapian::Query("next"));
     1581    subqueries.push_back(Xapian::Query("reader"));
     1582    query = Xapian::Query(Xapian::Query::OP_AND, subqueries.begin(), subqueries.end());
     1583    subqueries.clear();
     1584    subqueries.push_back(query);
     1585    subqueries.push_back(Xapian::Query("gutenberg"));
     1586    query = Xapian::Query(Xapian::Query::OP_SYNONYM, subqueries.begin(), subqueries.end());
     1587
     1588    tout << query.get_description() << endl;
     1589
     1590    Xapian::Database db(get_database("etext"));
     1591    Xapian::Enquire enquire(db);
     1592    enquire.set_query(query);
     1593    Xapian::MSet mset = enquire.get_mset(0, 10);
     1594    tout << mset.get_description() << endl;
     1595
     1596    // Regression test that OP_SCALE_WEIGHT works with OP_SYNONYM
     1597    double maxposs = mset.get_max_possible();
     1598    query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 10.0);
     1599    enquire.set_query(query);
     1600    mset = enquire.get_mset(0, 10);
     1601    double maxposs2 = mset.get_max_possible();
     1602
     1603    TEST_EQUAL_DOUBLE(maxposs * 10.0, maxposs2);
     1604
     1605    return true;
     1606}
     1607
    14601608// tests that specifying a nonexistent input file throws an exception.
    14611609DEFINE_TESTCASE(flintdatabaseopeningerror1, flint) {
    14621610    mkdir(".flint", 0755);
  • xapian-core/tests/queryparsertest.cc

     
    11/* queryparsertest.cc: Tests of Xapian::QueryParser
    22 *
    33 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
     4 * Copyright (C) 2007,2009 Lemur Consulting Ltd
    45 *
    56 * This program is free software; you can redistribute it and/or
    67 * modify it under the terms of the GNU General Public License as
     
    786787    Xapian::Query qobj = qp.parse_query("ab*", Xapian::QueryParser::FLAG_WILDCARD);
    787788    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(abc:(pos=1))");
    788789    qobj = qp.parse_query("muscle*", Xapian::QueryParser::FLAG_WILDCARD);
    789     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) OR musclebound:(pos=1)))");
     790    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) SYNONYM musclebound:(pos=1)))");
    790791    qobj = qp.parse_query("meat*", Xapian::QueryParser::FLAG_WILDCARD);
    791792    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query()");
    792793    qobj = qp.parse_query("musc*", Xapian::QueryParser::FLAG_WILDCARD);
    793     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1)))");
     794    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)))");
    794795    qobj = qp.parse_query("mutt*", Xapian::QueryParser::FLAG_WILDCARD);
    795796    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(mutton:(pos=1))");
    796797    // Regression test (we weren't lowercasing terms before checking if they
     
    879880    qp.add_prefix("author", "A");
    880881    Xapian::Query qobj;
    881882    qobj = qp.parse_query("author:h*", Xapian::QueryParser::FLAG_WILDCARD);
    882     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) OR Ahuxley:(pos=1)))");
     883    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)))");
    883884    qobj = qp.parse_query("author:h* test", Xapian::QueryParser::FLAG_WILDCARD);
    884     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) OR Ahuxley:(pos=1) OR test:(pos=2)))");
     885    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)) OR test:(pos=2)))");
    885886    return true;
    886887}
    887888
     
    907908    doc.add_term("XTcowl");
    908909    doc.add_term("XTcox");
    909910    doc.add_term("ZXTcow");
     911    doc.add_term("XONEpartial");
     912    doc.add_term("XONEpartial2");
     913    doc.add_term("XTWOpartial3");
     914    doc.add_term("XTWOpartial4");
    910915    db.add_document(doc);
    911916    Xapian::QueryParser qp;
    912917    qp.set_database(db);
     
    922927    qobj = qp.parse_query("ab", Xapian::QueryParser::FLAG_PARTIAL);
    923928    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR Zab:(pos=1)))");
    924929    qobj = qp.parse_query("muscle", Xapian::QueryParser::FLAG_PARTIAL);
    925     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) OR musclebound:(pos=1) OR Zmuscl:(pos=1)))");
     930    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscle:(pos=1) SYNONYM musclebound:(pos=1)) OR Zmuscl:(pos=1)))");
    926931    qobj = qp.parse_query("meat", Xapian::QueryParser::FLAG_PARTIAL);
    927932    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(Zmeat:(pos=1))");
    928933    qobj = qp.parse_query("musc", Xapian::QueryParser::FLAG_PARTIAL);
    929     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1) OR Zmusc:(pos=1)))");
     934    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)) OR Zmusc:(pos=1)))");
    930935    qobj = qp.parse_query("mutt", Xapian::QueryParser::FLAG_PARTIAL);
    931936    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((mutton:(pos=1) OR Zmutt:(pos=1)))");
    932937    qobj = qp.parse_query("abc musc", Xapian::QueryParser::FLAG_PARTIAL);
    933     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR muscat:(pos=2) OR muscle:(pos=2) OR musclebound:(pos=2) OR muscular:(pos=2) OR Zmusc:(pos=2)))");
     938    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR (muscat:(pos=2) SYNONYM muscle:(pos=2) SYNONYM musclebound:(pos=2) SYNONYM muscular:(pos=2)) OR Zmusc:(pos=2)))");
    934939    qobj = qp.parse_query("a* mutt", Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_WILDCARD);
    935940    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR mutton:(pos=2) OR Zmutt:(pos=2)))");
    936941
    937942    // Check behaviour with stemmed terms, and stem strategy STEM_SOME.
    938943    qobj = qp.parse_query("o", Xapian::QueryParser::FLAG_PARTIAL);
    939     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1) OR outside:(pos=1) OR Zo:(pos=1)))");
     944    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zo:(pos=1)))");
    940945    qobj = qp.parse_query("ou", Xapian::QueryParser::FLAG_PARTIAL);
    941     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1) OR outside:(pos=1) OR Zou:(pos=1)))");
     946    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zou:(pos=1)))");
    942947    qobj = qp.parse_query("out", Xapian::QueryParser::FLAG_PARTIAL);
    943     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1) OR outside:(pos=1) OR Zout:(pos=1)))");
     948    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zout:(pos=1)))");
    944949    qobj = qp.parse_query("outs", Xapian::QueryParser::FLAG_PARTIAL);
    945950    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR Zout:(pos=1)))");
    946951    qobj = qp.parse_query("outsi", Xapian::QueryParser::FLAG_PARTIAL);
     
    952957
    953958    // Check behaviour with capitalised terms, and stem strategy STEM_SOME.
    954959    qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL);
    955     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1,wqf=2) OR outside:(pos=1)))");
     960    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))");
    956961    qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL);
    957962    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR outs:(pos=1)))");
    958963    qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL);
     
    961966    // And now with stemming strategy STEM_ALL.
    962967    qp.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
    963968    qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL);
    964     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1,wqf=2) OR outside:(pos=1)))");
     969    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))");
    965970    qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL);
    966971    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR out:(pos=1)))");
    967972    qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL);
     
    970975    // Check handling of a case with a prefix.
    971976    qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
    972977    qobj = qp.parse_query("title:cow", Xapian::QueryParser::FLAG_PARTIAL);
    973     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcowl:(pos=1) OR XTcows:(pos=1) OR ZXTcow:(pos=1)))");
     978    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR ZXTcow:(pos=1)))");
    974979    qobj = qp.parse_query("title:cows", Xapian::QueryParser::FLAG_PARTIAL);
    975980    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcows:(pos=1) OR ZXTcow:(pos=1)))");
    976981    qobj = qp.parse_query("title:Cow", Xapian::QueryParser::FLAG_PARTIAL);
    977     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcowl:(pos=1) OR XTcows:(pos=1) OR XTcow:(pos=1)))");
     982    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR XTcow:(pos=1)))");
    978983    qobj = qp.parse_query("title:Cows", Xapian::QueryParser::FLAG_PARTIAL);
    979984    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(XTcows:(pos=1,wqf=2))");
    980985
     
    982987    // inflate the wqf of the "parsed as normal" version of a partial term
    983988    // by multiplying it by the number of prefixes mapped to.
    984989    qobj = qp.parse_query("double:vision", Xapian::QueryParser::FLAG_PARTIAL);
    985     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) OR ZXTWOvision:(pos=1)))");
     990    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) SYNONYM ZXTWOvision:(pos=1)))");
     991
     992    // Test handling of FLAG_PARTIAL when there's more than one prefix.
     993    qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL);
     994    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (ZXONEpart:(pos=1) SYNONYM ZXTWOpart:(pos=1))))");
     995
     996    // Test handling of FLAG_PARTIAL when there's more than one prefix, without
     997    // stemming.
     998    qp.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
     999    qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL);
     1000    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpart:(pos=1) SYNONYM XTWOpart:(pos=1))))");
     1001    qobj = qp.parse_query("double:partial", Xapian::QueryParser::FLAG_PARTIAL);
     1002    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpartial:(pos=1) SYNONYM XTWOpartial:(pos=1))))");
    9861003
    9871004    return true;
    9881005}
     
    15471564}
    15481565
    15491566static test test_synonym_queries[] = {
    1550     { "searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1))" },
    1551     { "search", "(Zsearch:(pos=1) OR find:(pos=1))" },
    1552     { "Search", "(search:(pos=1) OR find:(pos=1))" },
     1567    { "searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" },
     1568    { "search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" },
     1569    { "Search", "(search:(pos=1) SYNONYM find:(pos=1))" },
    15531570    { "Searching", "searching:(pos=1)" },
    1554     { "searching OR terms", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },
    1555     { "search OR terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
    1556     { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) OR find:(pos=1)))" },
    1557     { "search -terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_NOT Zterm:(pos=2))" },
    1558     { "+search terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
    1559     { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) OR find:(pos=1)))" },
    1560     { "search terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
     1571    { "searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" },
     1572    { "search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
     1573    { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1574    { "search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" },
     1575    { "+search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
     1576    { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1577    { "search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
    15611578    // Shouldn't trigger synonyms:
    15621579    { "\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" },
    15631580    { NULL, NULL }
     
    15971614
    15981615static test test_multi_synonym_queries[] = {
    15991616    { "sun OR tan OR cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3))" },
    1600     { "sun tan", "(Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1))" },
    1601     { "sun tan cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3) OR lotion:(pos=1))" },
    1602     { "beach sun tan holiday", "(Zbeach:(pos=1) OR Zsun:(pos=2) OR Ztan:(pos=3) OR bathe:(pos=2) OR Zholiday:(pos=4))" },
    1603     { "sun tan sun tan cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1) OR Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5) OR lotion:(pos=3))" },
    1604     { "single", "(Zsingl:(pos=1) OR record:(pos=1))" },
     1617    { "sun tan", "((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1))" },
     1618    { "sun tan cream", "((Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3)) SYNONYM lotion:(pos=1))" },
     1619    { "beach sun tan holiday", "(Zbeach:(pos=1) OR ((Zsun:(pos=2) OR Ztan:(pos=3)) SYNONYM bathe:(pos=2)) OR Zholiday:(pos=4))" },
     1620    { "sun tan sun tan cream", "(((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1)) OR ((Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5)) SYNONYM lotion:(pos=3)))" },
     1621    { "single", "(Zsingl:(pos=1) SYNONYM record:(pos=1))" },
    16051622    { NULL, NULL }
    16061623};
    16071624
     
    16401657
    16411658static test test_synonym_op_queries[] = {
    16421659    { "searching", "Zsearch:(pos=1)" },
    1643     { "~searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1))" },
    1644     { "~search", "(Zsearch:(pos=1) OR find:(pos=1))" },
    1645     { "~Search", "(search:(pos=1) OR find:(pos=1))" },
     1660    { "~searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" },
     1661    { "~search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" },
     1662    { "~Search", "(search:(pos=1) SYNONYM find:(pos=1))" },
    16461663    { "~Searching", "searching:(pos=1)" },
    1647     { "~searching OR terms", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },
    1648     { "~search OR terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
    1649     { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) OR find:(pos=1)))" },
    1650     { "~search -terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_NOT Zterm:(pos=2))" },
    1651     { "+~search terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
    1652     { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) OR find:(pos=1)))" },
    1653     { "~search terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
     1664    { "~searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" },
     1665    { "~search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
     1666    { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1667    { "~search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" },
     1668    { "+~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
     1669    { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1670    { "~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
    16541671    // FIXME: should look for multi-term synonym...
    16551672    { "~\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" },
    16561673    { NULL, NULL }
  • xapian-core/include/xapian/query.h

     
    119119            OP_VALUE_GE,
    120120
    121121            /** Filter by a less-than-or-equal test on a document value. */
    122             OP_VALUE_LE
     122            OP_VALUE_LE,
     123
     124            /** Treat a set of queries as synonyms.
     125             *
     126             *  This returns all results which match at least one of the
     127             *  queries, but weighting as if all the sub-queries are instances
     128             *  of the same term: so multiple matching terms for a document
     129             *  increase the wdf value used, and the term frequency is based on
     130             *  the number of documents which would match an OR of all the
     131             *  subqueries.
     132             *
     133             *  The term frequency used will usually be an approximation,
     134             *  because calculating the precise combined term frequency would
     135             *  be overly expensive.
     136             *
     137             *  Identical to OP_OR, except for the weightings returned.
     138             */
     139            OP_SYNONYM
    123140        } op;
    124141
    125142        /** Copy constructor. */
  • xapian-core/include/xapian/weight.h

     
    22 * @brief Weighting scheme API.
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    212213               const std::string & term, Xapian::termcount wqf_,
    213214               double factor);
    214215
     216    /** @private @internal Initialise this object to calculate weights for a
     217     *  synonym.
     218     *
     219     *  @param stats       Source of statistics.
     220     *  @param query_len_  Query length.
     221     *  @param factor      Any scaling factor (e.g. from OP_SCALE_WEIGHT).
     222     *  @param termfreq    The termfreq to use.
     223     */
     224    void init_(const Internal & stats, Xapian::termcount query_len_,
     225               double factor, Xapian::doccount termfreq);
     226
    215227    /** @private @internal Initialise this object to calculate the extra weight
    216228     *  component.
    217229     *
     
    230242        return stats_needed & DOC_LENGTH;
    231243    }
    232244
     245    /** @private @internal Return true if the WDF is needed.
     246     *
     247     *  If this method returns true, then the WDF will be fetched and passed to
     248     *  @a get_sumpart().  Otherwise 0 may be passed for the wdf.
     249     */
     250    bool get_sumpart_needs_wdf_() const {
     251        return stats_needed & WDF;
     252    }
     253
    233254  protected:
    234255    /// Only allow subclasses to copy us.
    235256    Weight(const Weight &);
  • xapian-core/api/omqueryinternal.cc

     
    6565        case Xapian::Query::OP_VALUE_RANGE:
    6666        case Xapian::Query::OP_VALUE_GE:
    6767        case Xapian::Query::OP_VALUE_LE:
     68        case Xapian::Query::OP_SYNONYM:
    6869            return 0;
    6970        case Xapian::Query::OP_SCALE_WEIGHT:
    7071            return 1;
     
    100101        case Xapian::Query::OP_NEAR:
    101102        case Xapian::Query::OP_PHRASE:
    102103        case Xapian::Query::OP_ELITE_SET:
     104        case Xapian::Query::OP_SYNONYM:
    103105            return UINT_MAX;
    104106        default:
    105107            Assert(false);
     
    221223                result += ".";
    222224                result += str_parameter; // serialise_double(get_dbl_parameter());
    223225                break;
     226            case Xapian::Query::OP_SYNONYM:
     227                result += "=";
     228                break;
    224229        }
    225230    }
    226231    return result;
     
    251256        case Xapian::Query::OP_VALUE_GE:        name = "VALUE_GE"; break;
    252257        case Xapian::Query::OP_VALUE_LE:        name = "VALUE_LE"; break;
    253258        case Xapian::Query::OP_SCALE_WEIGHT:    name = "SCALE_WEIGHT"; break;
     259        case Xapian::Query::OP_SYNONYM:         name = "SYNONYM"; break;
    254260    }
    255261    return name;
    256262}
     
    584590                    return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT,
    585591                                            subqs, 0, param);
    586592                }
    587                 default:
     593                case '=': {
     594                    return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs);
     595                }
     596                default:
    588597                    LOGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
    589598                    throw Xapian::InvalidArgumentError("Invalid query string");
    590599            }
     
    809818        case OP_ELITE_SET:
    810819        case OP_OR:
    811820        case OP_XOR:
     821        case OP_SYNONYM:
    812822            // Doing an "OR" type operation - if we've got any MatchNothing
    813823            // subnodes, drop them; except that we mustn't become an empty
    814824            // node due to this, so we never drop a MatchNothing subnode
     
    900910                }
    901911            }
    902912            break;
    903         case OP_OR: case OP_AND: case OP_XOR:
     913        case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM:
    904914            // Remove duplicates if we can.
    905915            if (subqs.size() > 1) collapse_subqs();
    906916            break;
     
    944954void
    945955Xapian::Query::Internal::collapse_subqs()
    946956{
    947     Assert(op == OP_OR || op == OP_AND || op == OP_XOR);
     957    Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM);
    948958    typedef set<Xapian::Query::Internal *, SortPosName> subqtable;
    949959    subqtable sqtab;
    950960
     
    10381048    Assert(!is_leaf(op));
    10391049    if (subq == 0) {
    10401050        subqs.push_back(0);
    1041     } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
     1051    } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) {
    10421052        // Distribute the subquery.
    10431053        for (subquery_list::const_iterator i = subq->subqs.begin();
    10441054             i != subq->subqs.end(); i++) {
  • xapian-bindings/python/smoketest2.py

     
    213213    qp.set_stemming_strategy(qp.STEM_SOME)
    214214    qp.set_stemmer(xapian.Stem('en'))
    215215    expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL),
    216                  "(Zfoo:(pos=1) AND (out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")
     216                 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))")
    217217
    218218    expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL),
    219219                 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")
  • xapian-bindings/python/smoketest3.py

     
    153153
    154154    # Feature test for Document.values
    155155    count = 0
    156     for term in doc.values():
     156    for term in list(doc.values()):
    157157        count += 1
    158158    expect(count, 0, "Unexpected number of entries in doc.values")
    159159
     
    213213    qp.set_stemming_strategy(qp.STEM_SOME)
    214214    qp.set_stemmer(xapian.Stem('en'))
    215215    expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL),
    216                  "(Zfoo:(pos=1) AND (out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")
     216                 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))")
    217217
    218218    expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL),
    219219                 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")