Ticket #50: opsynonym_changes_12590_12591.patch

File opsynonym_changes_12590_12591.patch, 121.8 KB (added by Richard Boulton, 16 years ago)

Latest patch

  • xapian-maintainer-tools/win32msvc/win32_matcher.mak

     
    3434    $(INTDIR)\queryoptimiser.obj\
    3535    $(INTDIR)\rset.obj\
    3636    $(INTDIR)\selectpostlist.obj\
     37    $(INTDIR)\synonympostlist.obj\
    3738    $(INTDIR)\valuerangepostlist.obj\
    3839    $(INTDIR)\valuegepostlist.obj\
    3940    $(INTDIR)\xorpostlist.obj\
     
    6061    $(INTDIR)\queryoptimiser.cc\
    6162    $(INTDIR)\rset.cc\
    6263    $(INTDIR)\selectpostlist.cc\
     64    $(INTDIR)\synonympostlist.cc\
    6365    $(INTDIR)\valuerangepostlist.cc\
    6466    $(INTDIR)\valuegepostlist.cc\
    6567    $(INTDIR)\xorpostlist.cc\
  • xapian-core/queryparser/queryparser.lemony

     
    22/* queryparser.lemony: build a Xapian::Query object from a user query string.
    33 *
    44 * Copyright (C) 2004,2005,2006,2007,2008 Olly Betts
     5 * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    287288            end = db.synonyms_end(term);
    288289        }
    289290        while (syn != end) {
    290             q = Query(Query::OP_OR, q, Query(*syn, 1, pos));
     291            q = Query(Query::OP_SYNONYM, q, Query(*syn, 1, pos));
    291292            ++syn;
    292293        }
    293294    }
     
    353354        }
    354355    }
    355356    delete this;
    356     return new Query(Query::OP_OR, subqs.begin(), subqs.end());
     357    return new Query(Query::OP_SYNONYM, subqs.begin(), subqs.end());
    357358}
    358359
    359360Query *
    360361Term::as_partial_query(State * state_) const
    361362{
    362363    Database db = state_->get_database();
    363     vector<Query> subqs;
     364    vector<Query> subqs_partial; // A synonym of all the partial terms.
     365    vector<Query> subqs_full; // A synonym of all the full terms.
    364366    list<string>::const_iterator piter;
    365367    for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
    366368        string root = *piter;
    367369        root += name;
    368370        TermIterator t = db.allterms_begin(root);
    369371        while (t != db.allterms_end(root)) {
    370             subqs.push_back(Query(*t, 1, pos));
     372            subqs_partial.push_back(Query(*t, 1, pos));
    371373            ++t;
    372374        }
    373375        // Add the term, as it would normally be handled, as an alternative.
    374         subqs.push_back(Query(make_term(*piter), 1, pos));
     376        subqs_full.push_back(Query(make_term(*piter), 1, pos));
    375377    }
    376378    delete this;
    377     return new Query(Query::OP_OR, subqs.begin(), subqs.end());
     379    return new Query(Query::OP_OR,
     380                     Query(Query::OP_SYNONYM,
     381                           subqs_partial.begin(), subqs_partial.end()),
     382                     Query(Query::OP_SYNONYM,
     383                           subqs_full.begin(), subqs_full.end()));
    378384}
    379385
    380386inline bool
     
    11761182                subqs2.push_back(Query(*syn, 1, pos));
    11771183                ++syn;
    11781184            }
    1179             Query q_synonym_terms(Query::OP_OR, subqs2.begin(), subqs2.end());
     1185            Query q_synonym_terms(Query::OP_SYNONYM, subqs2.begin(), subqs2.end());
    11801186            subqs2.clear();
    1181             subqs.push_back(Query(Query::OP_OR,
     1187            subqs.push_back(Query(Query::OP_SYNONYM,
    11821188                                  q_original_terms, q_synonym_terms));
    11831189        }
    11841190    } else {
  • xapian-core/matcher/extraweightpostlist.cc

     
     1/* extraweightpostlist.cc: Return only items which are in both sublists
     2 *
     3 * Copyright 2009 Lemur Consulting Ltd
     4 *
     5 * This program is free software; you can redistribute it and/or
     6 * modify it under the terms of the GNU General Public License as
     7 * published by the Free Software Foundation; either version 2 of the
     8 * License, or (at your option) any later version.
     9 *
     10 * This program is distributed in the hope that it will be useful,
     11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 * GNU General Public License for more details.
     14 *
     15 * You should have received a copy of the GNU General Public License
     16 * along with this program; if not, write to the Free Software
     17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
     18 * USA
     19 */
     20
     21#include <config.h>
     22
     23#include "extraweightpostlist.h"
     24#include "omassert.h"
     25
     26TermFreqs
     27ExtraWeightPostList::get_termfreq_est_using_stats(
     28        const Xapian::Weight::Internal &) const
     29{
     30    // Should never get called.
     31    Assert(false);
     32    return TermFreqs();
     33}
  • xapian-core/matcher/extraweightpostlist.h

    Property changes on: xapian-core/matcher/extraweightpostlist.cc
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2001 Ananova Ltd
    55 * Copyright 2003,2004,2007,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    4849            return pl->get_termfreq_est();
    4950        }
    5051
     52        TermFreqs get_termfreq_est_using_stats(
     53            const Xapian::Weight::Internal & stats) const;
     54
    5155        Xapian::docid  get_docid() const { return pl->get_docid(); }
    5256
    5357        Xapian::weight get_weight() const {
  • xapian-core/matcher/Makefile.mk

     
    1818        matcher/queryoptimiser.h\
    1919        matcher/remotesubmatch.h\
    2020        matcher/selectpostlist.h\
     21        matcher/synonympostlist.h\
    2122        matcher/valuegepostlist.h\
    2223        matcher/valuerangepostlist.h\
    2324        matcher/xorpostlist.h
     
    4344        matcher/emptysubmatch.cc\
    4445        matcher/exactphrasepostlist.cc\
    4546        matcher/externalpostlist.cc\
     47        matcher/extraweightpostlist.cc\
    4648        matcher/localmatch.cc\
    4749        matcher/mergepostlist.cc\
    4850        matcher/msetcmp.cc\
     
    5456        matcher/queryoptimiser.cc\
    5557        matcher/rset.cc\
    5658        matcher/selectpostlist.cc\
     59        matcher/synonympostlist.cc\
    5760        matcher/valuegepostlist.cc\
    5861        matcher/valuerangepostlist.cc\
    5962        matcher/xorpostlist.cc
  • xapian-core/matcher/andpostlist.h

     
    22 *
    33 * Copyright 2002 Ananova Ltd
    44 * Copyright 2003,2004,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    4344        Xapian::doccount get_termfreq_min() const;
    4445        Xapian::doccount get_termfreq_est() const;
    4546
     47        TermFreqs get_termfreq_est_using_stats(
     48            const Xapian::Weight::Internal & stats) const;
     49
    4650        Xapian::docid  get_docid() const;
    4751        Xapian::weight get_weight() const;
    4852        Xapian::weight get_maxweight() const;
     
    7074                    MultiMatch *matcher_,
    7175                    Xapian::doccount dbsize_,
    7276                    bool replacement = false);
     77
     78        /** get_wdf() for AND postlists returns the sum of the wdfs of the sub
     79         *  postlists - this is desirable when the AND is part of a synonym.
     80         */
     81        Xapian::termcount get_wdf() const;
    7382};
    7483
    7584#endif /* OM_HGUARD_ANDPOSTLIST_H */
  • xapian-core/matcher/multimatch.cc

     
    794794
    795795                LOGVALUE(MATCH, denom);
    796796                LOGVALUE(MATCH, percent_scale);
    797                 Assert(percent_scale <= denom);
    798                 denom *= greatest_wt;
    799                 Assert(denom > 0);
    800                 percent_scale /= denom;
     797                AssertRel(percent_scale,<=,denom);
     798                if (denom == 0) {
     799                    // This happens if the top-level operator is OP_SYNONYM.
     800                    percent_scale = 1.0 / greatest_wt;
     801                } else {
     802                    denom *= greatest_wt;
     803                    AssertRel(denom,>,0);
     804                    percent_scale /= denom;
     805                }
    801806            } else {
    802807                // If all the terms match, the 2 sums of weights cancel
    803808                percent_scale = 1.0 / greatest_wt;
  • xapian-core/matcher/localmatch.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2008,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    3131#include "omdebug.h"
    3232#include "omqueryinternal.h"
    3333#include "queryoptimiser.h"
     34#include "synonympostlist.h"
    3435#include "weightinternal.h"
    3536
    3637#include <cfloat>
     
    111112}
    112113
    113114PostList *
     115LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher,
     116                                     double factor)
     117{
     118    DEBUGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist",
     119              "[or_pl], [matcher], " << factor);
     120    LOGVALUE(MATCH, or_pl->get_termfreq_est());
     121    AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher));
     122    AutoPtr<Xapian::Weight> wt(wt_factory->clone_());
     123
     124    TermFreqs freqs(or_pl->get_termfreq_est_using_stats(*stats));
     125    wt->init_(*stats, qlen, factor, freqs.termfreq, freqs.reltermfreq);
     126
     127    res->set_weight(wt.release());
     128    RETURN(res.release());
     129}
     130
     131PostList *
    114132LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *query,
    115133                                           double factor)
    116134{
  • xapian-core/matcher/localmatch.h

     
    22 *  @brief SubMatch class for a local database.
    33 */
    44/* Copyright (C) 2006,2007,2009 Olly Betts
     5 * Copyright (C) 2007 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    8283    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    8384        std::map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts);
    8485
     86    /** Convert a postlist into a synonym postlist.
     87     */
     88    PostList * make_synonym_postlist(PostList * or_pl, MultiMatch * matcher,
     89                                     double factor);
     90
    8591    /** Convert an OP_LEAF query to a PostList.
    8692     *
    8793     *  This is called by QueryOptimiser when it reaches an OP_LEAF query.
  • xapian-core/matcher/msetpostlist.h

     
    22 *  @brief PostList returning entries from an MSet
    33 */
    44/* Copyright (C) 2006,2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    6263
    6364    Xapian::doccount get_termfreq_max() const;
    6465
     66    TermFreqs get_termfreq_est_using_stats(
     67        const Xapian::Weight::Internal & stats) const;
     68
    6569    Xapian::weight get_maxweight() const;
    6670
    6771    Xapian::docid get_docid() const;
  • xapian-core/matcher/xorpostlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    4748        Xapian::doccount get_termfreq_max() const;
    4849        Xapian::doccount get_termfreq_min() const;
    4950        Xapian::doccount get_termfreq_est() const;
     51        TermFreqs get_termfreq_est_using_stats(
     52            const Xapian::Weight::Internal & stats) const;
    5053
    5154        Xapian::docid  get_docid() const;
    5255        Xapian::weight get_weight() const;
     
    6972                    PostList * right_,
    7073                    MultiMatch * matcher_,
    7174                    Xapian::doccount dbsize_);
     75
     76        /** get_wdf() for XOR postlists returns the wdf of the sub postlist
     77         *  which is at the current document.
     78         */
     79        Xapian::termcount get_wdf() const;
    7280};
    7381
    7482#endif /* OM_HGUARD_XORPOSTLIST_H */
  • xapian-core/matcher/synonympostlist.h

     
     1/** @file synonympostlist.h
     2 * @brief Combine subqueries, weighting as if they are synonyms
     3 */
     4/* Copyright 2007,2009 Lemur Consulting Ltd
     5 *
     6 * This program is free software; you can redistribute it and/or modify
     7 * it under the terms of the GNU General Public License as published by
     8 * the Free Software Foundation; either version 2 of the License, or
     9 * (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     19 */
     20
     21#ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
     22#define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
     23
     24#include "multimatch.h"
     25#include "postlist.h"
     26
     27/** A postlist comprising several postlists SYNONYMed together.
     28 *
     29 *  This postlist returns all postings in the OR of the sub postlists, but
     30 *  returns weights as if they represented a single term.  The term frequency
     31 *  portion of the weight is approximated.
     32 */
     33class SynonymPostList : public PostList {
     34    /** The subtree, which starts as an OR of all the sub-postlists being
     35     *  joined with Synonym, but may decay into something else.
     36     */
     37    PostList * subtree;
     38
     39    /** The object which is using this postlist to perform a match.
     40     *
     41     *  This object needs to be notified when the tree changes such that the
     42     *  maximum weights need to be recalculated.
     43     */
     44    MultiMatch * matcher;
     45
     46    /// Weighting object used for calculating the synonym weights.
     47    const Xapian::Weight * wt;
     48
     49    /// Flag indicating whether the weighting object needs the doclength.
     50    bool want_doclength;
     51
     52    /// Flag indicating whether the weighting object needs the wdf.
     53    bool want_wdf;
     54
     55    /// Flag indicating if we've called recalc_maxweight on the subtree yet.
     56    bool have_calculated_subtree_maxweights;
     57
     58  public:
     59    SynonymPostList(PostList * subtree_, MultiMatch * matcher_)
     60        : subtree(subtree_), matcher(matcher_), wt(NULL),
     61          want_doclength(false), want_wdf(false),
     62          have_calculated_subtree_maxweights(false) { }
     63
     64    ~SynonymPostList();
     65
     66    /** Set the weight object to be used for the synonym postlist.
     67     *
     68     *  Ownership of the weight object passes to the synonym postlist - the
     69     *  caller must not delete it after use.
     70     */
     71    void set_weight(const Xapian::Weight * wt_);
     72
     73    PostList *next(Xapian::weight w_min);
     74    PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
     75
     76    Xapian::weight get_weight() const;
     77    Xapian::weight get_maxweight() const;
     78    Xapian::weight recalc_maxweight();
     79
     80    // The following methods just call through to the subtree.
     81    Xapian::termcount get_wdf() const;
     82    Xapian::doccount get_termfreq_min() const;
     83    Xapian::doccount get_termfreq_est() const;
     84    Xapian::doccount get_termfreq_max() const;
     85    TermFreqs get_termfreq_est_using_stats(
     86        const Xapian::Weight::Internal & stats) const;
     87    Xapian::docid get_docid() const;
     88    Xapian::termcount get_doclength() const;
     89    bool at_end() const;
     90
     91    std::string get_description() const;
     92};
     93
     94#endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */
  • xapian-core/matcher/phrasepostlist.cc

    Property changes on: xapian-core/matcher/synonympostlist.h
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2007,2008,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    149150    return std::max(wdf, 1u);
    150151}
    151152
     153TermFreqs
     154NearPostList::get_termfreq_est_using_stats(
     155        const Xapian::Weight::Internal & stats) const
     156{
     157    LOGCALL(MATCH, TermFreqs,
     158            "NearPostList::get_termfreq_est_using_stats", stats);
     159    // No idea how to estimate this - FIXME
     160    TermFreqs result(source->get_termfreq_est_using_stats(stats));
     161    result.termfreq /= 2;
     162    result.reltermfreq /= 2;
     163    RETURN(result);
     164}
     165
    152166std::string
    153167NearPostList::get_description() const
    154168{
     
    264278    return std::max(wdf / 2, 1u);
    265279}
    266280
     281TermFreqs
     282PhrasePostList::get_termfreq_est_using_stats(
     283        const Xapian::Weight::Internal & stats) const
     284{
     285    LOGCALL(MATCH, TermFreqs,
     286            "PhrasePostList::get_termfreq_est_using_stats", stats);
     287    // No idea how to estimate this - FIXME
     288    TermFreqs result(source->get_termfreq_est_using_stats(stats));
     289    result.termfreq /= 3;
     290    result.reltermfreq /= 3;
     291    RETURN(result);
     292}
     293
    267294std::string
    268295PhrasePostList::get_description() const
    269296{
  • xapian-core/matcher/orpostlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    4142        Xapian::doccount get_termfreq_max() const;
    4243        Xapian::doccount get_termfreq_min() const;
    4344        Xapian::doccount get_termfreq_est() const;
     45        TermFreqs get_termfreq_est_using_stats(
     46            const Xapian::Weight::Internal & stats) const;
    4447
    4548        Xapian::docid  get_docid() const;
    4649        Xapian::weight get_weight() const;
     
    6770                   PostList * right_,
    6871                   MultiMatch * matcher_,
    6972                   Xapian::doccount dbsize_);
     73
     74        /** get_wdf() for OR postlists returns the sum of the wdfs of the
     75         *  sub postlists which are at the current document - this is desirable
     76         *  when the OR is part of a synonym.
     77         */
     78        Xapian::termcount get_wdf() const;
    7079};
    7180
    7281#endif /* OM_HGUARD_ORPOSTLIST_H */
  • xapian-core/matcher/andmaybepostlist.h

     
    66 * Copyright 1999,2000,2001 BrightStation PLC
    77 * Copyright 2002 Ananova Ltd
    88 * Copyright 2003,2004,2009 Olly Betts
     9 * Copyright 2009 Lemur Consulting Ltd
    910 *
    1011 * This program is free software; you can redistribute it and/or
    1112 * modify it under the terms of the GNU General Public License as
     
    6162        Xapian::doccount get_termfreq_min() const;
    6263        Xapian::doccount get_termfreq_est() const;
    6364
     65        TermFreqs get_termfreq_est_using_stats(
     66            const Xapian::Weight::Internal & stats) const;
     67
    6468        Xapian::docid  get_docid() const;
    6569        Xapian::weight get_weight() const;
    6670        Xapian::weight get_maxweight() const;
     
    103107            lmax = l->get_maxweight();
    104108            rmax = r->get_maxweight();
    105109        }
     110
     111        /** get_wdf() for ANDMAYBE postlists returns the sum of the wdfs of the
     112         *  sub postlists which are at the current document - this is desirable
     113         *  when the ANDMAYBE is part of a synonym.
     114         */
     115        Xapian::termcount get_wdf() const;
    106116};
    107117
    108118#endif /* OM_HGUARD_ANDMAYBEPOSTLIST_H */
  • xapian-core/matcher/externalpostlist.cc

     
    7272    return source->get_termfreq_max();
    7373}
    7474
     75TermFreqs
     76ExternalPostList::get_termfreq_est_using_stats(
     77        const Xapian::Weight::Internal &) const
     78{
     79    // Should never get called.
     80    Assert(false);
     81    return TermFreqs();
     82}
     83
    7584Xapian::weight
    7685ExternalPostList::get_maxweight() const
    7786{
  • xapian-core/matcher/andnotpostlist.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2007,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    124125    RETURN(static_cast<Xapian::doccount>(est + 0.5));
    125126}
    126127
     128TermFreqs
     129AndNotPostList::get_termfreq_est_using_stats(
     130        const Xapian::Weight::Internal & stats) const
     131{
     132    LOGCALL(MATCH, TermFreqs,
     133              "AndNotPostList::get_termfreq_est_using_stats", stats);
     134    // Estimate assuming independence:
     135    // P(l and r) = P(l) . P(r)
     136    // P(l not r) = P(l) - P(l and r) = P(l) . ( 1 - P(r))
     137    TermFreqs lfreqs(l->get_termfreq_est_using_stats(stats));
     138    TermFreqs rfreqs(r->get_termfreq_est_using_stats(stats));
     139
     140    double freqest, relfreqest;
     141
     142    if (stats.collection_size == 0) {
     143        freqest = 0;
     144    } else {
     145        freqest = lfreqs.termfreq *
     146                (1.0 - (double(rfreqs.termfreq) / stats.collection_size));
     147    }
     148
     149    if (stats.rset_size == 0) {
     150        relfreqest = 0;
     151    } else {
     152        relfreqest = lfreqs.reltermfreq *
     153                (1.0 - (double(rfreqs.reltermfreq) / stats.rset_size));
     154    }
     155
     156    RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5),
     157                     static_cast<Xapian::doccount>(relfreqest + 0.5)));
     158}
     159
    127160Xapian::docid
    128161AndNotPostList::get_docid() const
    129162{
     
    175208    DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_doclength", "");
    176209    RETURN(l->get_doclength());
    177210}
     211
     212Xapian::termcount
     213AndNotPostList::get_wdf() const
     214{
     215    DEBUGCALL(MATCH, Xapian::termcount, "AndNotPostList::get_wdf", "");
     216    RETURN(l->get_wdf());
     217}
  • xapian-core/matcher/andnotpostlist.h

    Property changes on: xapian-core/matcher/collapser.h
    ___________________________________________________________________
    Deleted: svn:mergeinfo
    
     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    4243        Xapian::doccount get_termfreq_max() const;
    4344        Xapian::doccount get_termfreq_min() const;
    4445        Xapian::doccount get_termfreq_est() const;
     46        TermFreqs get_termfreq_est_using_stats(
     47            const Xapian::Weight::Internal & stats) const;
    4548
    4649        Xapian::docid  get_docid() const;
    4750        Xapian::weight get_weight() const;
     
    6972                                   Xapian::weight w_min,
    7073                                   Xapian::docid lh,
    7174                                   Xapian::docid rh);
     75
     76        /** get_wdf() for ANDNOT postlists returns the wdf of the left hand
     77         * side.
     78         */
     79        Xapian::termcount get_wdf() const;
    7280};
    7381
    7482#endif /* OM_HGUARD_ANDNOTPOSTLIST_H */
  • xapian-core/matcher/valuerangepostlist.cc

     
    22 * @brief Return document ids matching a range test on a specified doc value.
    33 */
    44/* Copyright 2007,2008,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    5051    return db_size / 2;
    5152}
    5253
     54TermFreqs
     55ValueRangePostList::get_termfreq_est_using_stats(
     56        const Xapian::Weight::Internal & stats) const
     57{
     58    LOGCALL(MATCH, TermFreqs,
     59            "ValueRangePostList::get_termfreq_est_using_stats", stats);
     60    // FIXME: It's hard to estimate well - perhaps consider the values of
     61    // begin and end?
     62    RETURN(TermFreqs(stats.collection_size / 2, stats.rset_size / 2));
     63}
     64
    5365Xapian::doccount
    5466ValueRangePostList::get_termfreq_max() const
    5567{
  • xapian-core/matcher/valuerangepostlist.h

     
    22 * @brief Return document ids matching a range test on a specified doc value.
    33 */
    44/* Copyright 2007,2008,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    5960
    6061    Xapian::doccount get_termfreq_max() const;
    6162
     63    TermFreqs get_termfreq_est_using_stats(
     64        const Xapian::Weight::Internal & stats) const;
     65
    6266    Xapian::weight get_maxweight() const;
    6367
    6468    Xapian::docid get_docid() const;
  • xapian-core/matcher/queryoptimiser.cc

     
    8282        case Xapian::Query::OP_ELITE_SET:
    8383            RETURN(do_or_like(query, factor));
    8484
     85        case Xapian::Query::OP_SYNONYM:
     86            RETURN(do_synonym(query, factor));
     87
    8588        case Xapian::Query::OP_AND_NOT: {
    8689            AssertEq(query->subqs.size(), 2);
    8790            PostList * l = do_subquery(query->subqs[0], factor);
     
    304307    // for AND-like operations.
    305308    Xapian::Query::Internal::op_t op = query->op;
    306309    Assert(op == Xapian::Query::OP_ELITE_SET || op == Xapian::Query::OP_OR ||
    307            op == Xapian::Query::OP_XOR);
     310           op == Xapian::Query::OP_XOR || op == Xapian::Query::OP_SYNONYM);
    308311
    309312    const Xapian::Query::Internal::subquery_list &queries = query->subqs;
    310313    AssertRel(queries.size(), >=, 2);
     
    382385                  ComparePostListTermFreqAscending());
    383386    }
    384387}
     388
     389PostList *
     390QueryOptimiser::do_synonym(const Xapian::Query::Internal *query, double factor)
     391{
     392    DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_synonym",
     393              query << ", " << factor);
     394    if (factor == 0.0) {
     395        // If we have a factor of 0, we don't care about the weights, so
     396        // we're just like a normal OR query.
     397        RETURN(do_or_like(query, 0.0));
     398    }
     399
     400    // We currently assume wqf is 1 for calculating the synonym's weight
     401    // since conceptually the synonym is one "virtual" term.  If we were
     402    // to combine multiple occurrences of the same synonym expansion into
     403    // a single instance with wqf set, we would want to use the wqf.
     404    AssertEq(query->wqf, 0);
     405
     406    // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
     407    // SynonymPostList, which supplies the weights.
     408    RETURN(localsubmatch.make_synonym_postlist(do_or_like(query, 0.0),
     409                                               matcher, factor));
     410}
  • xapian-core/matcher/mergepostlist.cc

     
    137137    return total;
    138138}
    139139
     140TermFreqs
     141MergePostList::get_termfreq_est_using_stats(
     142        const Xapian::Weight::Internal &) const
     143{
     144    // Should never get called.
     145    Assert(false);
     146    return TermFreqs();
     147}
     148
    140149Xapian::docid
    141150MergePostList::get_docid() const
    142151{
  • xapian-core/matcher/mergepostlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    5353        Xapian::doccount get_termfreq_max() const;
    5454        Xapian::doccount get_termfreq_min() const;
    5555        Xapian::doccount get_termfreq_est() const;
     56        TermFreqs get_termfreq_est_using_stats(
     57            const Xapian::Weight::Internal & stats) const;
    5658
    5759        Xapian::docid  get_docid() const;
    5860        Xapian::weight get_weight() const;
  • xapian-core/matcher/queryoptimiser.h

     
    22 * @brief Convert a Xapian::Query::Internal tree into an optimal PostList tree.
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2008 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    8889     */
    8990    PostList * do_or_like(const Xapian::Query::Internal *query, double factor);
    9091
     92    /** Optimise a synonym Xapian::Query::Internal subtree into a PostList
     93     *
     94     *  @param query    The subtree to optimise.
     95     *  @param factor   How much to scale weights for this subtree by.
     96     *
     97     *  @return         A PostList subtree.
     98     */
     99    PostList * do_synonym(const Xapian::Query::Internal *query, double factor);
     100
    91101  public:
    92102    QueryOptimiser(const Xapian::Database::Internal & db_,
    93103                   LocalSubMatch & localsubmatch_,
  • xapian-core/matcher/andpostlist.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2007,2008,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    149149    RETURN(static_cast<Xapian::doccount>(lest * rest / dbsize + 0.5));
    150150}
    151151
     152TermFreqs
     153AndPostList::get_termfreq_est_using_stats(
     154        const Xapian::Weight::Internal & stats) const
     155{
     156    LOGCALL(MATCH, TermFreqs,
     157            "AndPostList::get_termfreq_est_using_stats", stats);
     158    // Estimate assuming independence:
     159    // P(l and r) = P(l) . P(r)
     160    TermFreqs lfreqs(l->get_termfreq_est_using_stats(stats));
     161    TermFreqs rfreqs(r->get_termfreq_est_using_stats(stats));
     162
     163    double freqest, relfreqest;
     164
     165    if (stats.collection_size == 0) {
     166        freqest = 0;
     167    } else {
     168        freqest = double(lfreqs.termfreq) *
     169                double(rfreqs.termfreq) / stats.collection_size;
     170    }
     171
     172    if (stats.rset_size == 0) {
     173        relfreqest = 0;
     174    } else {
     175        relfreqest = double(lfreqs.reltermfreq) *
     176                double(rfreqs.reltermfreq) / stats.rset_size;
     177    }
     178
     179    RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5),
     180                     static_cast<Xapian::doccount>(relfreqest + 0.5)));
     181}
     182
    152183Xapian::docid
    153184AndPostList::get_docid() const
    154185{
     
    203234    AssertEq(doclength, r->get_doclength());
    204235    RETURN(doclength);
    205236}
     237
     238Xapian::termcount
     239AndPostList::get_wdf() const
     240{
     241    DEBUGCALL(MATCH, Xapian::termcount, "AndPostList::get_wdf", "");
     242    RETURN(l->get_wdf() + r->get_wdf());
     243}
  • xapian-core/matcher/exactphrasepostlist.cc

     
    154154    return source->get_termfreq_est() / 4;
    155155}
    156156
     157TermFreqs
     158ExactPhrasePostList::get_termfreq_est_using_stats(
     159        const Xapian::Weight::Internal & stats) const
     160{
     161    LOGCALL(MATCH, TermFreqs,
     162            "ExactPhrasePostList::get_termfreq_est_using_stats", stats);
     163    // No idea how to estimate this  - do the same as get_termfreq_est() for
     164    // now.
     165    TermFreqs result(source->get_termfreq_est_using_stats(stats));
     166    result.termfreq /= 4;
     167    result.reltermfreq /= 4;
     168    RETURN(result);
     169}
     170
    157171string
    158172ExactPhrasePostList::get_description() const
    159173{
  • xapian-core/matcher/msetpostlist.cc

     
    22 *  @brief PostList returning entries from an MSet
    33 */
    44/* Copyright (C) 2006,2007,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    4546    RETURN(mset_internal->matches_upper_bound);
    4647}
    4748
     49TermFreqs
     50MSetPostList::get_termfreq_est_using_stats(
     51        const Xapian::Weight::Internal &) const
     52{
     53    // Should never get called.
     54    Assert(false);
     55    return TermFreqs();
     56}
     57
    4858Xapian::weight
    4959MSetPostList::get_maxweight() const
    5060{
  • xapian-core/matcher/xorpostlist.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2007,2008,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    232233    RETURN(static_cast<Xapian::doccount>(est + 0.5));
    233234}
    234235
     236TermFreqs
     237XorPostList::get_termfreq_est_using_stats(
     238        const Xapian::Weight::Internal & stats) const
     239{
     240    LOGCALL(MATCH, TermFreqs,
     241            "XorPostList::get_termfreq_est_using_stats", stats);
     242    // Estimate assuming independence:
     243    // P(l or r) = P(l) + P(r) - 2 . P(l) . P(r)
     244    TermFreqs lfreqs(l->get_termfreq_est_using_stats(stats));
     245    TermFreqs rfreqs(r->get_termfreq_est_using_stats(stats));
     246
     247    double freqest, relfreqest;
     248
     249    if (stats.collection_size == 0) {
     250        freqest = 0;
     251    } else {
     252        freqest = lfreqs.termfreq + rfreqs.termfreq
     253                - (2.0 * lfreqs.termfreq * rfreqs.termfreq
     254                   / stats.collection_size);
     255    }
     256
     257    if (stats.collection_size == 0) {
     258        relfreqest = 0;
     259    } else {
     260        relfreqest = lfreqs.reltermfreq + rfreqs.reltermfreq
     261                - (2.0 * lfreqs.reltermfreq * rfreqs.reltermfreq
     262                   / stats.rset_size);
     263    }
     264
     265    RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5),
     266                     static_cast<Xapian::doccount>(relfreqest + 0.5)));
     267}
     268
    235269Xapian::docid
    236270XorPostList::get_docid() const
    237271{
     
    294328    Assert(lhead > rhead);
    295329    return r->get_doclength();
    296330}
     331
     332Xapian::termcount
     333XorPostList::get_wdf() const
     334{
     335    DEBUGCALL(MATCH, Xapian::termcount, "XorPostList::get_wdf", "");
     336    if (lhead < rhead) RETURN(l->get_wdf());
     337    RETURN(r->get_wdf());
     338}
  • xapian-core/matcher/exactphrasepostlist.h

     
    22 * @brief Return docs containing terms forming a particular exact phrase.
    33 *
    44 * Copyright (C) 2006 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    5859
    5960    Xapian::doccount get_termfreq_est() const;
    6061
     62    TermFreqs get_termfreq_est_using_stats(
     63        const Xapian::Weight::Internal & stats) const;
     64
    6165    std::string get_description() const;
    6266};
    6367
  • xapian-core/matcher/rset.cc

     
    8181    DEBUGCALL(MATCH, void, "RSetI::contribute_stats", stats);
    8282    calculate_stats();
    8383
    84     Xapian::TermFreqMap::const_iterator i;
     84    std::map<std::string, Xapian::doccount>::const_iterator i;
    8585    for (i = reltermfreqs.begin(); i != reltermfreqs.end(); i++) {
    8686        stats.set_reltermfreq(i->first, i->second);
    8787    }
  • xapian-core/matcher/synonympostlist.cc

     
     1/** @file synonympostlist.cc
     2 * @brief Combine subqueries, weighting as if they are synonyms
     3 */
     4/* Copyright 2007,2009 Lemur Consulting Ltd
     5 *
     6 * This program is free software; you can redistribute it and/or
     7 * modify it under the terms of the GNU General Public License as
     8 * published by the Free Software Foundation; either version 2 of the
     9 * License, or (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
     19 * USA
     20 */
     21
     22#include <config.h>
     23
     24#include "synonympostlist.h"
     25
     26#include "branchpostlist.h"
     27#include "debuglog.h"
     28
     29SynonymPostList::~SynonymPostList()
     30{
     31    delete wt;
     32    delete subtree;
     33}
     34
     35void
     36SynonymPostList::set_weight(const Xapian::Weight * wt_)
     37{
     38    delete wt;
     39    wt = wt_;
     40    want_doclength = wt->get_sumpart_needs_doclength_();
     41    want_wdf = wt->get_sumpart_needs_wdf_();
     42}
     43
     44PostList *
     45SynonymPostList::next(Xapian::weight w_min)
     46{
     47    LOGCALL(MATCH, PostList *, "SynonymPostList::next", w_min);
     48    (void)w_min;
     49    next_handling_prune(subtree, 0, matcher);
     50    RETURN(NULL);
     51}
     52
     53PostList *
     54SynonymPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
     55{
     56    LOGCALL(MATCH, PostList *, "SynonymPostList::skip_to", did << ", " << w_min);
     57    (void)w_min;
     58    skip_to_handling_prune(subtree, did, 0, matcher);
     59    RETURN(NULL);
     60}
     61
     62Xapian::weight
     63SynonymPostList::get_weight() const
     64{
     65    LOGCALL(MATCH, Xapian::weight, "SynonymPostList::get_weight", "");
     66    // The wdf returned can be higher than the doclength.  In particular, this
     67    // can currently occur if the query contains a term more than once; the wdf
     68    // of each occurrence is added up.
     69    //
     70    // However, it's reasonable for weighting algorithms to optimise by
     71    // assuming that get_wdf() will never return more than get_doclength(),
     72    // since the doclength is the sum of the wdfs.
     73    //
     74    // Therefore, we simply clamp the wdf value to the doclength, to ensure
     75    // that this is true.  Note that this requires the doclength to be
     76    // calculated even if the weight object doesn't want it.
     77
     78    if (want_wdf) {
     79        Xapian::termcount wdf = get_wdf();
     80        Xapian::termcount doclen = get_doclength();
     81        if (wdf > doclen) wdf = doclen;
     82        RETURN(wt->get_sumpart(wdf, doclen));
     83    }
     84    RETURN(wt->get_sumpart(0, want_doclength ? get_doclength() : 0));
     85}
     86
     87Xapian::weight
     88SynonymPostList::get_maxweight() const
     89{
     90    LOGCALL(MATCH, Xapian::weight, "SynonymPostList::get_maxweight", "");
     91    RETURN(wt->get_maxpart());
     92}
     93
     94Xapian::weight
     95SynonymPostList::recalc_maxweight()
     96{
     97    LOGCALL(MATCH, Xapian::weight, "SynonymPostList::recalc_maxweight", "");
     98
     99    // Call recalc_maxweight on the subtree once, to ensure that the maxweights
     100    // are initialised.
     101    if (!have_calculated_subtree_maxweights) {
     102        subtree->recalc_maxweight();
     103        have_calculated_subtree_maxweights = true;
     104    }
     105    RETURN(SynonymPostList::get_maxweight());
     106}
     107
     108Xapian::termcount
     109SynonymPostList::get_wdf() const {
     110    LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_wdf", "");
     111    RETURN(subtree->get_wdf());
     112}
     113
     114Xapian::doccount
     115SynonymPostList::get_termfreq_min() const {
     116    LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", "");
     117    RETURN(subtree->get_termfreq_min());
     118}
     119
     120Xapian::doccount
     121SynonymPostList::get_termfreq_est() const {
     122    LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", "");
     123    RETURN(subtree->get_termfreq_est());
     124}
     125
     126Xapian::doccount
     127SynonymPostList::get_termfreq_max() const {
     128    LOGCALL(MATCH, Xapian::doccount, "SynonymPostList::get_termfreq_min", "");
     129    RETURN(subtree->get_termfreq_max());
     130}
     131
     132TermFreqs
     133SynonymPostList::get_termfreq_est_using_stats(
     134        const Xapian::Weight::Internal & stats) const
     135{
     136    LOGCALL(MATCH, TermFreqs,
     137            "SynonymPostList::get_termfreq_est_using_stats", stats);
     138    RETURN(subtree->get_termfreq_est_using_stats(stats));
     139}
     140
     141Xapian::docid
     142SynonymPostList::get_docid() const {
     143    LOGCALL(MATCH, Xapian::docid, "SynonymPostList::get_docid", "");
     144    RETURN(subtree->get_docid());
     145}
     146
     147Xapian::termcount
     148SynonymPostList::get_doclength() const {
     149    LOGCALL(MATCH, Xapian::termcount, "SynonymPostList::get_doclength", "");
     150    RETURN(subtree->get_doclength());
     151}
     152
     153bool
     154SynonymPostList::at_end() const {
     155    LOGCALL(MATCH, bool, "SynonymPostList::at_end", "");
     156    RETURN(subtree->at_end());
     157}
     158
     159std::string
     160SynonymPostList::get_description() const
     161{
     162    return "(Synonym " + subtree->get_description() + ")";
     163}
  • xapian-core/matcher/multiandpostlist.cc

    Property changes on: xapian-core/matcher/synonympostlist.cc
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    22 * @brief N-way AND postlist
    33 */
    44/* Copyright (C) 2007,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    2223
    2324#include "multiandpostlist.h"
    2425#include "omassert.h"
     26#include "debuglog.h"
    2527
    2628MultiAndPostList::~MultiAndPostList()
    2729{
     
    8486    return static_cast<Xapian::doccount>(result + 0.5);
    8587}
    8688
     89TermFreqs
     90MultiAndPostList::get_termfreq_est_using_stats(
     91        const Xapian::Weight::Internal & stats) const
     92{
     93    LOGCALL(MATCH, TermFreqs,
     94            "MultiAndPostList::get_termfreq_est_using_stats", stats);
     95    // We calculate the estimate assuming independence.  With this assumption,
     96    // the estimate is the product of the estimates for the sub-postlists
     97    // divided by db_size (n_kids - 1) times.
     98    TermFreqs freqs(plist[0]->get_termfreq_est_using_stats(stats));
     99
     100    double freqest = double(freqs.termfreq);
     101    double relfreqest = double(freqs.reltermfreq);
     102
     103    for (size_t i = 1; i < n_kids; ++i) {
     104        freqs = plist[i]->get_termfreq_est_using_stats(stats);
     105
     106        // If the collection is empty, freqest should be 0 already, so leave
     107        // it alone.
     108        if (stats.collection_size != 0)
     109            freqest = (freqest * freqs.termfreq) / stats.collection_size;
     110
     111        // If the rset is empty, relfreqest should be 0 already, so leave
     112        // it alone.
     113        if (stats.rset_size != 0)
     114            relfreqest = (relfreqest * freqs.reltermfreq) / stats.rset_size;
     115    }
     116
     117    RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5),
     118                     static_cast<Xapian::doccount>(relfreqest + 0.5)));
     119}
     120
    87121Xapian::weight
    88122MultiAndPostList::get_maxweight() const
    89123{
  • xapian-core/matcher/multiandpostlist.h

     
    22 * @brief N-way AND postlist
    33 */
    44/* Copyright (C) 2007,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    135136
    136137    Xapian::doccount get_termfreq_est() const;
    137138
     139    TermFreqs get_termfreq_est_using_stats(
     140        const Xapian::Weight::Internal & stats) const;
     141
    138142    Xapian::weight get_maxweight() const;
    139143
    140144    Xapian::docid get_docid() const;
     
    154158    std::string get_description() const;
    155159
    156160    /** get_wdf() for MultiAndPostlists returns the sum of the wdfs of the
    157      *  sub postlists.  The wdf isn't really meaningful in many situations,
    158      *  but if the lists are being combined as a synonym we want the sum of
    159      *  the wdfs, so we do that in general.
     161     *  sub postlists.
     162     *
     163     *  The wdf isn't really meaningful in many situations, but if the lists
     164     *  are being combined as a synonym we want the sum of the wdfs, so we do
     165     *  that in general.
    160166     */
    161     virtual Xapian::termcount get_wdf() const;
     167    Xapian::termcount get_wdf() const;
    162168};
    163169
    164170#endif // XAPIAN_INCLUDED_MULTIANDPOSTLIST_H
  • xapian-core/matcher/andmaybepostlist.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2005,2008,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    112113    RETURN(l->get_termfreq_est());
    113114}
    114115
     116TermFreqs
     117AndMaybePostList::get_termfreq_est_using_stats(
     118        const Xapian::Weight::Internal & stats) const
     119{
     120    LOGCALL(MATCH, TermFreqs,
     121            "AndMaybePostList::get_termfreq_est_using_stats", stats);
     122    // Termfreq is exactly that of left hand branch.
     123    RETURN(l->get_termfreq_est_using_stats(stats));
     124}
     125
    115126Xapian::docid
    116127AndMaybePostList::get_docid() const
    117128{
     
    169180    if (lhead == rhead) AssertEq(l->get_doclength(), r->get_doclength());
    170181    RETURN(l->get_doclength());
    171182}
     183
     184Xapian::termcount
     185AndMaybePostList::get_wdf() const
     186{
     187    DEBUGCALL(MATCH, Xapian::termcount, "AndMaybePostList::get_wdf", "");
     188    if (lhead == rhead) RETURN(l->get_wdf() + r->get_wdf());
     189    RETURN(l->get_wdf());
     190}
  • xapian-core/matcher/orpostlist.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2001,2002 Ananova Ltd
    55 * Copyright 2003,2004,2007,2008,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    184185    RETURN(static_cast<Xapian::doccount>(est + 0.5));
    185186}
    186187
     188TermFreqs
     189OrPostList::get_termfreq_est_using_stats(
     190        const Xapian::Weight::Internal & stats) const
     191{
     192    LOGCALL(MATCH, TermFreqs,
     193            "OrPostList::get_termfreq_est_using_stats", stats);
     194    // Estimate assuming independence:
     195    // P(l or r) = P(l) + P(r) - P(l) . P(r)
     196    TermFreqs lfreqs(l->get_termfreq_est_using_stats(stats));
     197    TermFreqs rfreqs(r->get_termfreq_est_using_stats(stats));
     198
     199    double freqest, relfreqest;
     200
     201    if (stats.collection_size == 0) {
     202        freqest = 0;
     203    } else {
     204        freqest = lfreqs.termfreq + rfreqs.termfreq -
     205                (lfreqs.termfreq * rfreqs.termfreq / stats.collection_size);
     206    }
     207
     208    if (stats.rset_size == 0) {
     209        relfreqest = 0;
     210    } else {
     211        relfreqest = lfreqs.reltermfreq + rfreqs.reltermfreq -
     212                (lfreqs.reltermfreq * rfreqs.reltermfreq / stats.rset_size);
     213    }
     214
     215    RETURN(TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5),
     216                     static_cast<Xapian::doccount>(relfreqest + 0.5)));
     217}
     218
    187219Xapian::docid
    188220OrPostList::get_docid() const
    189221{
     
    258290
    259291    RETURN(doclength);
    260292}
     293
     294Xapian::termcount
     295OrPostList::get_wdf() const
     296{
     297    DEBUGCALL(MATCH, Xapian::termcount, "OrPostList::get_wdf", "");
     298    if (lhead < rhead) RETURN(l->get_wdf());
     299    if (lhead > rhead) RETURN(r->get_wdf());
     300    RETURN(l->get_wdf() + r->get_wdf());
     301}
  • xapian-core/matcher/externalpostlist.h

     
    5656
    5757    Xapian::doccount get_termfreq_max() const;
    5858
     59    TermFreqs get_termfreq_est_using_stats(
     60        const Xapian::Weight::Internal & stats) const;
     61
    5962    Xapian::weight get_maxweight() const;
    6063
    6164    Xapian::docid get_docid() const;
  • xapian-core/matcher/phrasepostlist.h

     
    33 * ----START-LICENCE----
    44 * Copyright 1999,2000,2001 BrightStation PLC
    55 * Copyright 2003,2004,2005 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    5354            return source->get_termfreq_est() / 2;
    5455        }
    5556
     57        TermFreqs get_termfreq_est_using_stats(
     58            const Xapian::Weight::Internal & stats) const;
     59
    5660        NearPostList(PostList *source_, Xapian::termpos window_,
    5761                     std::vector<PostList *> terms_)
    5862                : SelectPostList(source_)
     
    8791            return source->get_termfreq_est() / 3;
    8892        }
    8993
     94        TermFreqs get_termfreq_est_using_stats(
     95            const Xapian::Weight::Internal & stats) const;
     96
    9097        PhrasePostList(PostList *source_, Xapian::termpos window_,
    9198                       std::vector<PostList *> terms_) : SelectPostList(source_)
    9299        {
  • xapian-core/weight/weightinternal.cc

     
    2828
    2929using namespace std;
    3030
     31std::string
     32TermFreqs::get_description() const {
     33    return std::string("TermFreqs(") + om_tostring(termfreq) + ", " +
     34            om_tostring(reltermfreq) + ")";
     35}
     36
    3137namespace Xapian {
    3238
    3339Weight::Internal &
     
    3844    rset_size += inc.rset_size;
    3945
    4046    // Add termfreqs and reltermfreqs
    41     TermFreqMap::const_iterator i;
    42     for (i = inc.termfreq.begin(); i != inc.termfreq.end(); ++i) {
    43         termfreq[i->first] += i->second;
    44     }
    45     for (i = inc.reltermfreq.begin(); i != inc.reltermfreq.end(); ++i) {
    46         reltermfreq[i->first] += i->second;
     47    map<string, TermFreqs>::const_iterator i;
     48    for (i = inc.termfreqs.begin(); i != inc.termfreqs.end(); ++i) {
     49        termfreqs[i->first] += i->second;
    4750    }
    4851    return *this;
    4952}
     
    5457    // We pass an empty std::string for term when calculating the extra weight.
    5558    if (term.empty()) return 0;
    5659
    57     TermFreqMap::const_iterator tfreq = termfreq.find(term);
    58     Assert(tfreq != termfreq.end());
    59     return tfreq->second;
     60    map<string, TermFreqs>::const_iterator tfreq = termfreqs.find(term);
     61    Assert(tfreq != termfreqs.end());
     62    return tfreq->second.termfreq;
    6063}
    6164
    6265void
     
    6467{
    6568    // Can be called a second time, if a term occurs multiple times in the
    6669    // query; if this happens, the termfreq should be the same each time.
    67     Assert(termfreq.find(term) == termfreq.end() ||
    68            termfreq.find(term)->second == tfreq);
    69     termfreq[term] = tfreq;
     70    Assert(termfreqs.find(term) == termfreqs.end() ||
     71           termfreqs.find(term)->second.termfreq == 0 ||
     72           termfreqs.find(term)->second.termfreq == tfreq);
     73    termfreqs[term].termfreq = tfreq;
    7074}
    7175
    7276Xapian::doccount
     
    7579    // We pass an empty string for term when calculating the extra weight.
    7680    if (term.empty()) return 0;
    7781
    78     TermFreqMap::const_iterator rtfreq = reltermfreq.find(term);
    79     Assert(rtfreq != reltermfreq.end());
    80     return rtfreq->second;
     82    map<string, TermFreqs>::const_iterator tfreq = termfreqs.find(term);
     83    Assert(tfreq != termfreqs.end());
     84    return tfreq->second.reltermfreq;
    8185}
    8286
    8387void
     
    8589{
    8690    // Can be called a second time, if a term occurs multiple times in the
    8791    // query; if this happens, the termfreq should be the same each time.
    88     Assert(reltermfreq.find(term) == reltermfreq.end() ||
    89            reltermfreq.find(term)->second == rtfreq);
    90     reltermfreq[term] = rtfreq;
     92    Assert(termfreqs.find(term) == termfreqs.end() ||
     93           termfreqs.find(term)->second.reltermfreq == 0 ||
     94           termfreqs.find(term)->second.reltermfreq == rtfreq);
     95    termfreqs[term].reltermfreq = rtfreq;
    9196}
    9297
    9398string
  • xapian-core/weight/weight.cc

     
    22 * @brief Xapian::Weight base class
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    7778    init(factor);
    7879}
    7980
     81void
     82Weight::init_(const Internal & stats, Xapian::termcount query_length,
     83              double factor, Xapian::doccount termfreq,
     84              Xapian::doccount reltermfreq)
     85{
     86    LOGCALL_VOID(MATCH, "Weight::init_", stats << ", " << query_length <<
     87            ", " << factor << ", " << termfreq << ", " << reltermfreq);
     88    // Synonym case.
     89    collection_size_ = stats.collection_size;
     90    rset_size_ = stats.rset_size;
     91    if (stats_needed & AVERAGE_LENGTH)
     92        average_length_ = stats.get_average_length();
     93    if (stats_needed & DOC_LENGTH_MAX)
     94        doclength_upper_bound_ = stats.db.get_doclength_upper_bound();
     95    if (stats_needed & DOC_LENGTH_MIN)
     96        doclength_lower_bound_ = stats.db.get_doclength_lower_bound();
     97
     98    // The doclength is an upper bound on the wdf.  This is obviously true for
     99    // normal terms, but SynonymPostList ensures that it is also true for
     100    // synonym terms by clamping the wdf values returned to the doclength.
     101    //
     102    // (This clamping is only actually necessary in cases where a constituent
     103    // term of the synonym is repeated.)
     104    if (stats_needed & WDF_MAX)
     105        wdf_upper_bound_ = stats.db.get_doclength_upper_bound();
     106
     107    termfreq_ = termfreq;
     108    reltermfreq_ = reltermfreq;
     109    query_length_ = query_length;
     110    wqf_ = 1;
     111    init(factor);
     112}
     113
    80114Weight::~Weight() { }
    81115
    82116}
  • xapian-core/tests/api_opsynonym.cc

     
     1/** @file api_opsynonym.cc
     2 * @brief tests of OP_SYNONYM.
     3 */
     4/* Copyright 2009 Olly Betts
     5 * Copyright 2007,2008,2009 Lemur Consulting Ltd
     6 *
     7 * This program is free software; you can redistribute it and/or
     8 * modify it under the terms of the GNU General Public License as
     9 * published by the Free Software Foundation; either version 2 of the
     10 * License, or (at your option) any later version.
     11 *
     12 * This program is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with this program; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
     20 * USA
     21 */
     22
     23#include <config.h>
     24
     25#include "api_opsynonym.h"
     26
     27#include <map>
     28#include <set>
     29#include <vector>
     30
     31#include <xapian.h>
     32
     33#include "backendmanager.h"
     34#include "testsuite.h"
     35#include "testutils.h"
     36
     37#include "apitest.h"
     38
     39using namespace std;
     40
     41// #######################################################################
     42// # Tests start here
     43
     44// Check a synonym search
     45DEFINE_TESTCASE(synonym1, backend) {
     46    Xapian::Database db(get_database("etext"));
     47
     48    TEST_REL(db.get_doclength_upper_bound(), >, 0);
     49
     50    Xapian::doccount lots = 214;
     51
     52    // Make a list of lists of subqueries, which are going to be joined
     53    // together as a synonym.
     54    vector<vector<Xapian::Query> > subqueries_list;
     55
     56    // For each set of subqueries, keep a list of the number of results for
     57    // which the weight should be the same when combined with OP_SYNONYM as
     58    // when combined with OP_OR.
     59    vector<int> subqueries_sameweight_count;
     60    vector<int> subqueries_diffweight_count;
     61
     62    vector<Xapian::Query> subqueries;
     63    subqueries.push_back(Xapian::Query("date"));
     64    subqueries_list.push_back(subqueries);
     65    // Single term - all 33 results should be same weight.
     66    subqueries_sameweight_count.push_back(33);
     67    subqueries_diffweight_count.push_back(0);
     68
     69    // Two terms, which co-occur in some documents.
     70    subqueries.clear();
     71    subqueries.push_back(Xapian::Query("sky"));
     72    subqueries.push_back(Xapian::Query("date"));
     73    subqueries_list.push_back(subqueries);
     74    // All 34 results should be different.
     75    subqueries_sameweight_count.push_back(0);
     76    subqueries_diffweight_count.push_back(34);
     77
     78    // Two terms which are entirely disjoint, and where the maximum weight
     79    // doesn't occur in the first or second match.
     80    subqueries.clear();
     81    subqueries.push_back(Xapian::Query("gutenberg"));
     82    subqueries.push_back(Xapian::Query("blockhead"));
     83    subqueries_list.push_back(subqueries);
     84    // All 18 results should be different.
     85    subqueries_sameweight_count.push_back(0);
     86    subqueries_diffweight_count.push_back(18);
     87
     88    subqueries.clear();
     89    subqueries.push_back(Xapian::Query("date"));
     90    subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
     91                                       Xapian::Query("sky"),
     92                                       Xapian::Query("glove")));
     93    subqueries_list.push_back(subqueries);
     94    // All 34 results should be different.
     95    subqueries_sameweight_count.push_back(0);
     96    subqueries_diffweight_count.push_back(34);
     97
     98    subqueries.clear();
     99    subqueries.push_back(Xapian::Query("date"));
     100    subqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
     101                                       Xapian::Query("sky"),
     102                                       Xapian::Query("date")));
     103    subqueries_list.push_back(subqueries);
     104    // All 34 results should be different.
     105    subqueries_sameweight_count.push_back(0);
     106    subqueries_diffweight_count.push_back(34);
     107
     108    subqueries.clear();
     109    subqueries.push_back(Xapian::Query("date"));
     110    subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND_MAYBE,
     111                                       Xapian::Query("sky"),
     112                                       Xapian::Query("date")));
     113    subqueries_list.push_back(subqueries);
     114    // All 34 results should be different.
     115    subqueries_sameweight_count.push_back(0);
     116    subqueries_diffweight_count.push_back(34);
     117
     118    subqueries.clear();
     119    subqueries.push_back(Xapian::Query("date"));
     120    subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND_NOT,
     121                                       Xapian::Query("sky"),
     122                                       Xapian::Query("date")));
     123    subqueries_list.push_back(subqueries);
     124    // All 34 results should be different.
     125    subqueries_sameweight_count.push_back(0);
     126    subqueries_diffweight_count.push_back(34);
     127
     128    subqueries.clear();
     129    subqueries.push_back(Xapian::Query("date"));
     130    subqueries.push_back(Xapian::Query(Xapian::Query::OP_AND,
     131                                       Xapian::Query("sky"),
     132                                       Xapian::Query("date")));
     133    subqueries_list.push_back(subqueries);
     134    // The AND only matches 1 document, so the estimated termfreq for the whole
     135    // synonym works out as 33 (due to rounding), which is the same as the
     136    // termfreq for "date".  Therefore most of the weights are the same as just
     137    // for the pure "date" search, and the only document which gets a different
     138    // weight is the one also matched by "sky" (because it has a wdf boost).
     139    subqueries_sameweight_count.push_back(32);
     140    subqueries_diffweight_count.push_back(1);
     141
     142    subqueries.clear();
     143    subqueries.push_back(Xapian::Query("date"));
     144    subqueries.push_back(Xapian::Query(Xapian::Query::OP_XOR,
     145                                       Xapian::Query("sky"),
     146                                       Xapian::Query("date")));
     147    subqueries_list.push_back(subqueries);
     148    // All 34 results should be different.
     149    subqueries_sameweight_count.push_back(0);
     150    subqueries_diffweight_count.push_back(34);
     151
     152    subqueries.clear();
     153    subqueries.push_back(Xapian::Query("date"));
     154    subqueries.push_back(Xapian::Query(Xapian::Query::OP_SYNONYM,
     155                                       Xapian::Query("sky"),
     156                                       Xapian::Query("date")));
     157    subqueries_list.push_back(subqueries);
     158    // When the top-level operator is OR, the synonym part has an estimated
     159    // termfreq of 35.  When the top-level operator is SYNONYM, the whole query
     160    // has an estimated termfreq of 35, and is in fact the same as the synonmyn
     161    // part in the OR query, except that the wqf of "date" is 2.  We're
     162    // currently not using the wqfs of components of synonyms, so this
     163    // difference has no effect on the weightings.  Therefore, for the 1
     164    // document which does not contain "data", we get the same result with
     165    // SYNONYM as with OR.
     166    subqueries_sameweight_count.push_back(1);
     167    subqueries_diffweight_count.push_back(33);
     168
     169    subqueries.clear();
     170    subqueries.push_back(Xapian::Query("sky"));
     171    subqueries.push_back(Xapian::Query("date"));
     172    subqueries.push_back(Xapian::Query("stein"));
     173    subqueries.push_back(Xapian::Query("ally"));
     174    subqueries_list.push_back(subqueries);
     175    // All 35 results should be different.
     176    subqueries_sameweight_count.push_back(0);
     177    subqueries_diffweight_count.push_back(35);
     178
     179    subqueries.clear();
     180    subqueries.push_back(Xapian::Query("attitud"));
     181    subqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
     182                                       Xapian::Query("german"),
     183                                       Xapian::Query("adventur")));
     184    subqueries_list.push_back(subqueries);
     185    // The estimated term frequency for the synoynm is 2 (because the estimate
     186    // for the phrase is 0), which is the same as the term frequency of
     187    // "attitud".  Thus, the synonym gets the same weight as "attitud", so
     188    // documents with only "attitud" (but not the phrase) in them get the same
     189    // wdf, and have the same total weight.  There turns out to be exactly one
     190    // such document.
     191    subqueries_sameweight_count.push_back(1);
     192    subqueries_diffweight_count.push_back(3);
     193
     194    for (vector<vector<Xapian::Query> >::size_type subqgroup = 0;
     195         subqgroup != subqueries_list.size(); ++subqgroup)
     196    {
     197        vector<Xapian::Query> * qlist = &(subqueries_list[subqgroup]);
     198        // Run two queries, one joining the subqueries with OR and one joining
     199        // them with SYNONYM.
     200        Xapian::Enquire enquire(db);
     201
     202        // Do the search with OR
     203        Xapian::Query orquery(Xapian::Query::OP_OR, qlist->begin(), qlist->end());
     204        enquire.set_query(orquery);
     205        Xapian::MSet ormset = enquire.get_mset(0, lots);
     206
     207        // Do the search with synonym, getting all the results.
     208        Xapian::Query synquery(Xapian::Query::OP_SYNONYM, qlist->begin(), qlist->end());
     209        enquire.set_query(synquery);
     210        Xapian::MSet synmset = enquire.get_mset(0, lots);
     211
     212        tout << "Comparing " << orquery << " with " << synquery << '\n';
     213
     214        // Check that the queries return some results.
     215        TEST_NOT_EQUAL(synmset.size(), 0);
     216        // Check that the queries return the same number of results.
     217        TEST_EQUAL(synmset.size(), ormset.size());
     218        map<Xapian::docid, Xapian::weight> values_or;
     219        map<Xapian::docid, Xapian::weight> values_synonym;
     220        for (Xapian::doccount i = 0; i < synmset.size(); ++i) {
     221            values_or[*ormset[i]] = ormset[i].get_weight();
     222            values_synonym[*synmset[i]] = synmset[i].get_weight();
     223        }
     224        TEST_EQUAL(values_or.size(), values_synonym.size());
     225
     226        /* Check that the most of the weights for items in the "or" mset are
     227         * different from those in the "synonym" mset. */
     228        int same_weight = 0;
     229        int different_weight = 0;
     230        for (map<Xapian::docid, Xapian::weight>::const_iterator
     231             j = values_or.begin(); j != values_or.end(); ++j) {
     232            Xapian::docid did = j->first;
     233            // Check that all the results in the or tree make it to the synonym
     234            // tree.
     235            TEST(values_synonym.find(did) != values_synonym.end());
     236            if (values_or[did] == values_synonym[did]) {
     237                ++same_weight;
     238            } else {
     239                ++different_weight;
     240            }
     241        }
     242
     243        int expected_same = subqueries_sameweight_count[subqgroup];
     244        int expected_diff = subqueries_diffweight_count[subqgroup];
     245
     246        TEST_EQUAL(different_weight, expected_diff);
     247        TEST_EQUAL(same_weight, expected_same);
     248
     249        // Do the search with synonym, but just get the top result.
     250        // (Regression test - the OR subquery in the synonym postlist tree used
     251        // to shortcut incorrectly, and return the wrong result here).
     252        Xapian::MSet mset_top = enquire.get_mset(0, 1);
     253        TEST_EQUAL(mset_top.size(), 1);
     254        TEST(mset_range_is_same(mset_top, 0, synmset, 0, 1));
     255    }
     256    return true;
     257}
     258
     259// Regression test - test a synonym search with a MultiAndPostlist.
     260DEFINE_TESTCASE(synonym2, backend) {
     261    Xapian::Query query;
     262    vector<Xapian::Query> subqueries;
     263    subqueries.push_back(Xapian::Query("file"));
     264    subqueries.push_back(Xapian::Query("the"));
     265    subqueries.push_back(Xapian::Query("next"));
     266    subqueries.push_back(Xapian::Query("reader"));
     267    query = Xapian::Query(Xapian::Query::OP_AND, subqueries.begin(), subqueries.end());
     268    subqueries.clear();
     269    subqueries.push_back(query);
     270    subqueries.push_back(Xapian::Query("gutenberg"));
     271    query = Xapian::Query(Xapian::Query::OP_SYNONYM, subqueries.begin(), subqueries.end());
     272
     273    tout << query << '\n';
     274
     275    Xapian::Database db(get_database("etext"));
     276    Xapian::Enquire enquire(db);
     277    enquire.set_query(query);
     278    Xapian::MSet mset = enquire.get_mset(0, 10);
     279    tout << mset << '\n';
     280
     281    // Regression test that OP_SCALE_WEIGHT works with OP_SYNONYM
     282    double maxposs = mset.get_max_possible();
     283    query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 10.0);
     284    enquire.set_query(query);
     285    mset = enquire.get_mset(0, 10);
     286    double maxposs2 = mset.get_max_possible();
     287
     288    TEST_EQUAL_DOUBLE(maxposs * 10.0, maxposs2);
     289
     290    return true;
     291}
     292
     293static void
     294check_msets_contain_same_docs(const Xapian::MSet & mset1,
     295                              const Xapian::MSet & mset2)
     296{
     297    TEST_EQUAL(mset1.size(), mset2.size());
     298
     299    set<Xapian::docid> docids;
     300    for (Xapian::doccount i = 0; i < mset1.size(); ++i) {
     301        docids.insert(*mset1[i]);
     302    }
     303
     304    // Check that all the results in mset1 are in mset2.
     305    for (Xapian::doccount j = 0; j < mset2.size(); ++j) {
     306        // Check that we can erase each entry from mset2 element.  Since mset1
     307        // and mset2 are the same size this means we can be sure that there
     308        // were no repeated docids in either (it would be a bug if there were).
     309        TEST(docids.erase(*mset2[j]));
     310    }
     311}
     312
     313// Test a synonym search which has had its weight scaled to 0.
     314DEFINE_TESTCASE(synonym3, backend) {
     315    Xapian::Query query = Xapian::Query(Xapian::Query::OP_SYNONYM,
     316                                        Xapian::Query("sky"),
     317                                        Xapian::Query("date"));
     318
     319    Xapian::Database db(get_database("etext"));
     320    Xapian::Enquire enquire(db);
     321    enquire.set_query(query);
     322    Xapian::MSet mset_orig = enquire.get_mset(0, db.get_doccount());
     323
     324    tout << query << '\n';
     325    tout << mset_orig << '\n';
     326
     327    // Test that OP_SCALE_WEIGHT with a factor of 0.0 works with OP_SYNONYM
     328    // (this has a special codepath to avoid doing the synonym calculation).
     329    query = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 0.0);
     330    enquire.set_query(query);
     331    Xapian::MSet mset_zero = enquire.get_mset(0, db.get_doccount());
     332
     333    tout << query << '\n';
     334    tout << mset_zero << '\n';
     335
     336    // Check that the queries return some results.
     337    TEST_NOT_EQUAL(mset_zero.size(), 0);
     338    // Check that the queries return the same document IDs, and the zero
     339    // one has zero weight.
     340    check_msets_contain_same_docs(mset_orig, mset_zero);
     341    for (Xapian::doccount i = 0; i < mset_orig.size(); ++i) {
     342        TEST_NOT_EQUAL(mset_orig[i].get_weight(), 0.0);
     343        TEST_EQUAL(mset_zero[i].get_weight(), 0.0);
     344    }
     345
     346    return true;
     347}
     348
     349// Test synonym searches combined with various operators.
     350DEFINE_TESTCASE(synonym4, backend) {
     351    Xapian::Database db(get_database("etext"));
     352    Xapian::Enquire enquire(db);
     353    Xapian::Query syn_query = Xapian::Query(Xapian::Query::OP_SYNONYM,
     354                                            Xapian::Query("gutenberg"),
     355                                            Xapian::Query("blockhead"));
     356    Xapian::Query or_query = Xapian::Query(Xapian::Query::OP_OR,
     357                                           Xapian::Query("gutenberg"),
     358                                           Xapian::Query("blockhead"));
     359    Xapian::Query date_query = Xapian::Query("date");
     360
     361    // Check some queries.
     362    static const Xapian::Query::op operators[] = {
     363        Xapian::Query::OP_AND_MAYBE,
     364        Xapian::Query::OP_AND_NOT,
     365        Xapian::Query::OP_AND,
     366        Xapian::Query::OP_XOR,
     367        Xapian::Query::OP_OR,
     368        Xapian::Query::OP_SYNONYM
     369    };
     370    const Xapian::Query::op * end;
     371    end = operators + sizeof(operators) / sizeof(operators[0]);
     372    for (const Xapian::Query::op * i = operators; i != end; ++i) {
     373        tout.str(string());
     374        Xapian::Query query1(*i, syn_query, date_query);
     375        Xapian::Query query2(*i, or_query, date_query);
     376
     377        enquire.set_query(query1);
     378        tout << "query1:" << query1 << '\n';
     379        Xapian::MSet mset1 = enquire.get_mset(0, db.get_doccount());
     380        tout << "mset1:" << mset1 << '\n';
     381        enquire.set_query(query2);
     382        tout << "query2:" << query2 << '\n';
     383        Xapian::MSet mset2 = enquire.get_mset(0, db.get_doccount());
     384        tout << "mset2:" << mset2 << '\n';
     385
     386        TEST_NOT_EQUAL(mset1.size(), 0);
     387        check_msets_contain_same_docs(mset1, mset2);
     388    }
     389
     390    return true;
     391}
  • xapian-core/tests/queryparsertest.cc

    Property changes on: xapian-core/tests/api_opsynonym.cc
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    11/* queryparsertest.cc: Tests of Xapian::QueryParser
    22 *
    33 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
     4 * Copyright (C) 2007,2009 Lemur Consulting Ltd
    45 *
    56 * This program is free software; you can redistribute it and/or
    67 * modify it under the terms of the GNU General Public License as
     
    789790    Xapian::Query qobj = qp.parse_query("ab*", Xapian::QueryParser::FLAG_WILDCARD);
    790791    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(abc:(pos=1))");
    791792    qobj = qp.parse_query("muscle*", Xapian::QueryParser::FLAG_WILDCARD);
    792     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) OR musclebound:(pos=1)))");
     793    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) SYNONYM musclebound:(pos=1)))");
    793794    qobj = qp.parse_query("meat*", Xapian::QueryParser::FLAG_WILDCARD);
    794795    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query()");
    795796    qobj = qp.parse_query("musc*", Xapian::QueryParser::FLAG_WILDCARD);
    796     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1)))");
     797    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)))");
    797798    qobj = qp.parse_query("mutt*", Xapian::QueryParser::FLAG_WILDCARD);
    798799    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(mutton:(pos=1))");
    799800    // Regression test (we weren't lowercasing terms before checking if they
     
    886887    qp.add_prefix("author", "A");
    887888    Xapian::Query qobj;
    888889    qobj = qp.parse_query("author:h*", Xapian::QueryParser::FLAG_WILDCARD);
    889     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) OR Ahuxley:(pos=1)))");
     890    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)))");
    890891    qobj = qp.parse_query("author:h* test", Xapian::QueryParser::FLAG_WILDCARD);
    891     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Aheinlein:(pos=1) OR Ahuxley:(pos=1) OR test:(pos=2)))");
     892    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((Aheinlein:(pos=1) SYNONYM Ahuxley:(pos=1)) OR test:(pos=2)))");
    892893    return true;
    893894#endif
    894895}
     
    918919    doc.add_term("XTcowl");
    919920    doc.add_term("XTcox");
    920921    doc.add_term("ZXTcow");
     922    doc.add_term("XONEpartial");
     923    doc.add_term("XONEpartial2");
     924    doc.add_term("XTWOpartial3");
     925    doc.add_term("XTWOpartial4");
    921926    db.add_document(doc);
    922927    Xapian::QueryParser qp;
    923928    qp.set_database(db);
     
    933938    qobj = qp.parse_query("ab", Xapian::QueryParser::FLAG_PARTIAL);
    934939    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR Zab:(pos=1)))");
    935940    qobj = qp.parse_query("muscle", Xapian::QueryParser::FLAG_PARTIAL);
    936     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscle:(pos=1) OR musclebound:(pos=1) OR Zmuscl:(pos=1)))");
     941    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscle:(pos=1) SYNONYM musclebound:(pos=1)) OR Zmuscl:(pos=1)))");
    937942    qobj = qp.parse_query("meat", Xapian::QueryParser::FLAG_PARTIAL);
    938943    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(Zmeat:(pos=1))");
    939944    qobj = qp.parse_query("musc", Xapian::QueryParser::FLAG_PARTIAL);
    940     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((muscat:(pos=1) OR muscle:(pos=1) OR musclebound:(pos=1) OR muscular:(pos=1) OR Zmusc:(pos=1)))");
     945    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((muscat:(pos=1) SYNONYM muscle:(pos=1) SYNONYM musclebound:(pos=1) SYNONYM muscular:(pos=1)) OR Zmusc:(pos=1)))");
    941946    qobj = qp.parse_query("mutt", Xapian::QueryParser::FLAG_PARTIAL);
    942947    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((mutton:(pos=1) OR Zmutt:(pos=1)))");
    943948    qobj = qp.parse_query("abc musc", Xapian::QueryParser::FLAG_PARTIAL);
    944     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR muscat:(pos=2) OR muscle:(pos=2) OR musclebound:(pos=2) OR muscular:(pos=2) OR Zmusc:(pos=2)))");
     949    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((Zabc:(pos=1) OR (muscat:(pos=2) SYNONYM muscle:(pos=2) SYNONYM musclebound:(pos=2) SYNONYM muscular:(pos=2)) OR Zmusc:(pos=2)))");
    945950    qobj = qp.parse_query("a* mutt", Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_WILDCARD);
    946951    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((abc:(pos=1) OR mutton:(pos=2) OR Zmutt:(pos=2)))");
    947952
    948953    // Check behaviour with stemmed terms, and stem strategy STEM_SOME.
    949954    qobj = qp.parse_query("o", Xapian::QueryParser::FLAG_PARTIAL);
    950     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1) OR outside:(pos=1) OR Zo:(pos=1)))");
     955    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zo:(pos=1)))");
    951956    qobj = qp.parse_query("ou", Xapian::QueryParser::FLAG_PARTIAL);
    952     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1) OR outside:(pos=1) OR Zou:(pos=1)))");
     957    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zou:(pos=1)))");
    953958    qobj = qp.parse_query("out", Xapian::QueryParser::FLAG_PARTIAL);
    954     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1) OR outside:(pos=1) OR Zout:(pos=1)))");
     959    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR Zout:(pos=1)))");
    955960    qobj = qp.parse_query("outs", Xapian::QueryParser::FLAG_PARTIAL);
    956961    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR Zout:(pos=1)))");
    957962    qobj = qp.parse_query("outsi", Xapian::QueryParser::FLAG_PARTIAL);
     
    963968
    964969    // Check behaviour with capitalised terms, and stem strategy STEM_SOME.
    965970    qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL);
    966     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1,wqf=2) OR outside:(pos=1)))");
     971    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))");
    967972    qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL);
    968973    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR outs:(pos=1)))");
    969974    qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL);
     
    972977    // And now with stemming strategy STEM_ALL.
    973978    qp.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
    974979    qobj = qp.parse_query("Out", Xapian::QueryParser::FLAG_PARTIAL);
    975     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((out:(pos=1,wqf=2) OR outside:(pos=1)))");
     980    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((out:(pos=1) SYNONYM outside:(pos=1)) OR out:(pos=1)))");
    976981    qobj = qp.parse_query("Outs", Xapian::QueryParser::FLAG_PARTIAL);
    977982    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((outside:(pos=1) OR out:(pos=1)))");
    978983    qobj = qp.parse_query("Outside", Xapian::QueryParser::FLAG_PARTIAL);
     
    981986    // Check handling of a case with a prefix.
    982987    qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
    983988    qobj = qp.parse_query("title:cow", Xapian::QueryParser::FLAG_PARTIAL);
    984     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcowl:(pos=1) OR XTcows:(pos=1) OR ZXTcow:(pos=1)))");
     989    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR ZXTcow:(pos=1)))");
    985990    qobj = qp.parse_query("title:cows", Xapian::QueryParser::FLAG_PARTIAL);
    986991    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcows:(pos=1) OR ZXTcow:(pos=1)))");
    987992    qobj = qp.parse_query("title:Cow", Xapian::QueryParser::FLAG_PARTIAL);
    988     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((XTcowl:(pos=1) OR XTcows:(pos=1) OR XTcow:(pos=1)))");
     993    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XTcowl:(pos=1) SYNONYM XTcows:(pos=1)) OR XTcow:(pos=1)))");
    989994    qobj = qp.parse_query("title:Cows", Xapian::QueryParser::FLAG_PARTIAL);
    990995    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(XTcows:(pos=1,wqf=2))");
    991996
     
    993998    // inflate the wqf of the "parsed as normal" version of a partial term
    994999    // by multiplying it by the number of prefixes mapped to.
    9951000    qobj = qp.parse_query("double:vision", Xapian::QueryParser::FLAG_PARTIAL);
    996     TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) OR ZXTWOvision:(pos=1)))");
     1001    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query((ZXONEvision:(pos=1) SYNONYM ZXTWOvision:(pos=1)))");
     1002
     1003    // Test handling of FLAG_PARTIAL when there's more than one prefix.
     1004    qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL);
     1005    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (ZXONEpart:(pos=1) SYNONYM ZXTWOpart:(pos=1))))");
     1006
     1007    // Test handling of FLAG_PARTIAL when there's more than one prefix, without
     1008    // stemming.
     1009    qp.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
     1010    qobj = qp.parse_query("double:part", Xapian::QueryParser::FLAG_PARTIAL);
     1011    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpart:(pos=1) SYNONYM XTWOpart:(pos=1))))");
     1012    qobj = qp.parse_query("double:partial", Xapian::QueryParser::FLAG_PARTIAL);
     1013    TEST_STRINGS_EQUAL(qobj.get_description(), "Xapian::Query(((XONEpartial:(pos=1) SYNONYM XONEpartial2:(pos=1) SYNONYM XTWOpartial3:(pos=1) SYNONYM XTWOpartial4:(pos=1)) OR (XONEpartial:(pos=1) SYNONYM XTWOpartial:(pos=1))))");
    9971014
    9981015    return true;
    9991016#endif
     
    15631580}
    15641581
    15651582static test test_synonym_queries[] = {
    1566     { "searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1))" },
    1567     { "search", "(Zsearch:(pos=1) OR find:(pos=1))" },
    1568     { "Search", "(search:(pos=1) OR find:(pos=1))" },
     1583    { "searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" },
     1584    { "search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" },
     1585    { "Search", "(search:(pos=1) SYNONYM find:(pos=1))" },
    15691586    { "Searching", "searching:(pos=1)" },
    1570     { "searching OR terms", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },
    1571     { "search OR terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
    1572     { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) OR find:(pos=1)))" },
    1573     { "search -terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_NOT Zterm:(pos=2))" },
    1574     { "+search terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
    1575     { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) OR find:(pos=1)))" },
    1576     { "search terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
     1587    { "searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" },
     1588    { "search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
     1589    { "search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1590    { "search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" },
     1591    { "+search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
     1592    { "-search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1593    { "search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
    15771594    // Shouldn't trigger synonyms:
    15781595    { "\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" },
    15791596    { NULL, NULL }
     
    16131630
    16141631static test test_multi_synonym_queries[] = {
    16151632    { "sun OR tan OR cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3))" },
    1616     { "sun tan", "(Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1))" },
    1617     { "sun tan cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3) OR lotion:(pos=1))" },
    1618     { "beach sun tan holiday", "(Zbeach:(pos=1) OR Zsun:(pos=2) OR Ztan:(pos=3) OR bathe:(pos=2) OR Zholiday:(pos=4))" },
    1619     { "sun tan sun tan cream", "(Zsun:(pos=1) OR Ztan:(pos=2) OR bathe:(pos=1) OR Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5) OR lotion:(pos=3))" },
    1620     { "single", "(Zsingl:(pos=1) OR record:(pos=1))" },
     1633    { "sun tan", "((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1))" },
     1634    { "sun tan cream", "((Zsun:(pos=1) OR Ztan:(pos=2) OR Zcream:(pos=3)) SYNONYM lotion:(pos=1))" },
     1635    { "beach sun tan holiday", "(Zbeach:(pos=1) OR ((Zsun:(pos=2) OR Ztan:(pos=3)) SYNONYM bathe:(pos=2)) OR Zholiday:(pos=4))" },
     1636    { "sun tan sun tan cream", "(((Zsun:(pos=1) OR Ztan:(pos=2)) SYNONYM bathe:(pos=1)) OR ((Zsun:(pos=3) OR Ztan:(pos=4) OR Zcream:(pos=5)) SYNONYM lotion:(pos=3)))" },
     1637    { "single", "(Zsingl:(pos=1) SYNONYM record:(pos=1))" },
    16211638    { NULL, NULL }
    16221639};
    16231640
     
    16561673
    16571674static test test_synonym_op_queries[] = {
    16581675    { "searching", "Zsearch:(pos=1)" },
    1659     { "~searching", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1))" },
    1660     { "~search", "(Zsearch:(pos=1) OR find:(pos=1))" },
    1661     { "~Search", "(search:(pos=1) OR find:(pos=1))" },
     1676    { "~searching", "(Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1))" },
     1677    { "~search", "(Zsearch:(pos=1) SYNONYM find:(pos=1))" },
     1678    { "~Search", "(search:(pos=1) SYNONYM find:(pos=1))" },
    16621679    { "~Searching", "searching:(pos=1)" },
    1663     { "~searching OR terms", "(Zsearch:(pos=1) OR Zfind:(pos=1) OR Zlocate:(pos=1) OR Zterm:(pos=2))" },
    1664     { "~search OR terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
    1665     { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) OR find:(pos=1)))" },
    1666     { "~search -terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_NOT Zterm:(pos=2))" },
    1667     { "+~search terms", "((Zsearch:(pos=1) OR find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
    1668     { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) OR find:(pos=1)))" },
    1669     { "~search terms", "(Zsearch:(pos=1) OR find:(pos=1) OR Zterm:(pos=2))" },
     1680    { "~searching OR terms", "((Zsearch:(pos=1) SYNONYM Zfind:(pos=1) SYNONYM Zlocate:(pos=1)) OR Zterm:(pos=2))" },
     1681    { "~search OR terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
     1682    { "~search +terms", "(Zterm:(pos=2) AND_MAYBE (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1683    { "~search -terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_NOT Zterm:(pos=2))" },
     1684    { "+~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) AND_MAYBE Zterm:(pos=2))" },
     1685    { "-~search terms", "(Zterm:(pos=2) AND_NOT (Zsearch:(pos=1) SYNONYM find:(pos=1)))" },
     1686    { "~search terms", "((Zsearch:(pos=1) SYNONYM find:(pos=1)) OR Zterm:(pos=2))" },
    16701687    // FIXME: should look for multi-term synonym...
    16711688    { "~\"search terms\"", "(search:(pos=1) PHRASE 2 terms:(pos=2))" },
    16721689    { NULL, NULL }
  • xapian-core/tests/Makefile.am

     
    116116 api_db.cc \
    117117 api_generated.cc \
    118118 api_nodb.cc \
     119 api_opsynonym.cc \
    119120 api_percentages.cc \
    120121 api_posdb.cc \
    121122 api_query.cc \
  • xapian-core/include/xapian/query.h

    Property changes on: xapian-core/tests
    ___________________________________________________________________
    Modified: svn:ignore
       - .*.sw?
    *.lo
    *.pyc
    Makefile.in
    Makefile
    .deps
    .libs
    apitest
    internaltest
    perftest
    queryparsertest
    runsrv
    runtest
    stemtest
    termgentest
    apitest.exe
    internaltest.exe
    perftest.exe
    queryparsertest.exe
    stemtest.exe
    termgentest.exe
    .chert
    .flint
    .multi
    .multichert
    .multiflint
    .stub
    api_all.h
    api_anydb.h
    api_backend.h
    api_closedb.h
    api_collapse.h
    api_collated.h
    api_collated.stamp
    api_db.h
    api_generated.cc
    api_generated.h
    api_nodb.h
    api_percentages.h
    api_posdb.h
    api_replicate.h
    api_query.h
    api_scalability.h
    api_serialise.h
    api_sorting.h
    api_spelling.h
    api_transdb.h
    api_unicode.h
    api_valuestats.h
    api_valuestream.h
    api_wrdb.h
    perftest_all.h
    perftest_collated.h
    perftest_collated.stamp
    perftest_matchdecider.h
    perftest_randomidx.h
    perflog.xml
    submitperftest
    
       + .*.sw?
    *.lo
    *.pyc
    Makefile.in
    Makefile
    .deps
    .libs
    apitest
    internaltest
    perftest
    queryparsertest
    runsrv
    runtest
    stemtest
    termgentest
    apitest.exe
    internaltest.exe
    perftest.exe
    queryparsertest.exe
    stemtest.exe
    termgentest.exe
    .chert
    .flint
    .multi
    .multichert
    .multiflint
    .stub
    api_all.h
    api_anydb.h
    api_backend.h
    api_closedb.h
    api_collapse.h
    api_collated.h
    api_collated.stamp
    api_db.h
    api_generated.cc
    api_generated.h
    api_nodb.h
    api_opsynonym.h
    api_percentages.h
    api_posdb.h
    api_replicate.h
    api_query.h
    api_scalability.h
    api_serialise.h
    api_sorting.h
    api_spelling.h
    api_transdb.h
    api_unicode.h
    api_valuestats.h
    api_valuestream.h
    api_wrdb.h
    perftest_all.h
    perftest_collated.h
    perftest_collated.stamp
    perftest_matchdecider.h
    perftest_randomidx.h
    perflog.xml
    submitperftest
    
    
     
    119119            OP_VALUE_GE,
    120120
    121121            /** Filter by a less-than-or-equal test on a document value. */
    122             OP_VALUE_LE
     122            OP_VALUE_LE,
     123
     124            /** Treat a set of queries as synonyms.
     125             *
     126             *  This returns all results which match at least one of the
     127             *  queries, but weighting as if all the sub-queries are instances
     128             *  of the same term: so multiple matching terms for a document
     129             *  increase the wdf value used, and the term frequency is based on
     130             *  the number of documents which would match an OR of all the
     131             *  subqueries.
     132             *
     133             *  The term frequency used will usually be an approximation,
     134             *  because calculating the precise combined term frequency would
     135             *  be overly expensive.
     136             *
     137             *  Identical to OP_OR, except for the weightings returned.
     138             */
     139            OP_SYNONYM
    123140        } op;
    124141
    125142        /** Copy constructor. */
  • xapian-core/include/xapian/weight.h

     
    22 * @brief Weighting scheme API.
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    212213               const std::string & term, Xapian::termcount wqf_,
    213214               double factor);
    214215
     216    /** @private @internal Initialise this object to calculate weights for a
     217     *  synonym.
     218     *
     219     *  @param stats       Source of statistics.
     220     *  @param query_len_  Query length.
     221     *  @param factor      Any scaling factor (e.g. from OP_SCALE_WEIGHT).
     222     *  @param termfreq    The termfreq to use.
     223     *  @param reltermfreq The reltermfreq to use.
     224     */
     225    void init_(const Internal & stats, Xapian::termcount query_len_,
     226               double factor, Xapian::doccount termfreq,
     227               Xapian::doccount reltermfreq);
     228
    215229    /** @private @internal Initialise this object to calculate the extra weight
    216230     *  component.
    217231     *
     
    230244        return stats_needed & DOC_LENGTH;
    231245    }
    232246
     247    /** @private @internal Return true if the WDF is needed.
     248     *
     249     *  If this method returns true, then the WDF will be fetched and passed to
     250     *  @a get_sumpart().  Otherwise 0 may be passed for the wdf.
     251     */
     252    bool get_sumpart_needs_wdf_() const {
     253        return stats_needed & WDF;
     254    }
     255
    233256  protected:
    234257    /// Only allow subclasses to copy us.
    235258    Weight(const Weight &);
     
    373396        need_stat(RELTERMFREQ);
    374397        need_stat(WDF);
    375398        need_stat(WDF_MAX);
     399        need_stat(WDF);
    376400        if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) {
    377401            need_stat(DOC_LENGTH_MIN);
    378402            need_stat(AVERAGE_LENGTH);
     
    392416        need_stat(RELTERMFREQ);
    393417        need_stat(WDF);
    394418        need_stat(WDF_MAX);
     419        need_stat(WDF);
    395420        need_stat(DOC_LENGTH_MIN);
    396421        need_stat(AVERAGE_LENGTH);
    397422        need_stat(DOC_LENGTH);
     
    455480        need_stat(DOC_LENGTH_MIN);
    456481        need_stat(WDF);
    457482        need_stat(WDF_MAX);
     483        need_stat(WDF);
    458484    }
    459485
    460486    std::string name() const;
  • xapian-core/net/serialise.cc

     
    128128    result += encode_length(stats.collection_size);
    129129    result += encode_length(stats.rset_size);
    130130
    131     map<string, Xapian::doccount>::const_iterator i;
     131    map<string, TermFreqs>::const_iterator i;
    132132
    133     result += encode_length(stats.termfreq.size());
    134     for (i = stats.termfreq.begin(); i != stats.termfreq.end(); ++i) {
     133    // FIXME - next time we're breaking the protocol API, do the freqs and
     134    // relfreqs term-by-term in a single pass.
     135
     136    result += encode_length(stats.termfreqs.size());
     137    for (i = stats.termfreqs.begin(); i != stats.termfreqs.end(); ++i) {
    135138        result += encode_length(i->first.size());
    136139        result += i->first;
    137         result += encode_length(i->second);
     140        result += encode_length(i->second.termfreq);
    138141    }
    139142
    140     for (i = stats.reltermfreq.begin(); i != stats.reltermfreq.end(); ++i) {
     143    for (i = stats.termfreqs.begin(); i != stats.termfreqs.end(); ++i) {
    141144        result += encode_length(i->first.size());
    142145        result += i->first;
    143         result += encode_length(i->second);
     146        result += encode_length(i->second.reltermfreq);
    144147    }
    145148
    146149    return result;
     
    163166        size_t len = decode_length(&p, p_end, true);
    164167        string term(p, len);
    165168        p += len;
    166         stat.termfreq.insert(make_pair(term, decode_length(&p, p_end, false)));
     169        stat.termfreqs.insert(make_pair(term, TermFreqs(decode_length(&p, p_end, false), 0)));
    167170    }
    168171
    169172    while (p != p_end) {
    170173        size_t len = decode_length(&p, p_end, true);
    171174        string term(p, len);
    172175        p += len;
    173         stat.reltermfreq.insert(make_pair(term, decode_length(&p, p_end, false)));
     176        stat.termfreqs[term].reltermfreq = decode_length(&p, p_end, false);
    174177    }
    175178
    176179    return stat;
  • xapian-core/common/leafpostlist.h

     
    22 * @brief Abstract base class for leaf postlists.
    33 */
    44/* Copyright (C) 2007 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    2324
    2425#include "postlist.h"
    2526
     27#include <string>
     28
    2629namespace Xapian {
    2730    class Weight;
    2831}
     
    7679    Xapian::weight recalc_maxweight();
    7780};
    7881
     82/// Abstract base class for leaf postlists based on a term.
     83class TermBasedLeafPostList : public LeafPostList {
     84    /// Don't allow assignment.
     85    void operator=(const TermBasedLeafPostList &);
     86
     87    /// Don't allow copying.
     88    TermBasedLeafPostList(const TermBasedLeafPostList &);
     89
     90  protected:
     91    /// The term name for this postlist ("" for an alldocs postlist).
     92    std::string tname;
     93
     94    /// Only constructable as a base class for derived classes.
     95    TermBasedLeafPostList(const std::string & tname_)
     96            : LeafPostList(), tname(tname_) {}
     97
     98  public:
     99    TermFreqs get_termfreq_est_using_stats(
     100        const Xapian::Weight::Internal & stats) const;
     101};
     102
    79103#endif // XAPIAN_INCLUDED_LEAFPOSTLIST_H
  • xapian-core/common/postlist.h

     
    22 * @brief Abstract base class for postlists.
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    2829#include <xapian/postingiterator.h>
    2930
    3031#include "positionlist.h"
     32#include "weightinternal.h"
    3133
    3234/// Abstract base class for postlists.
    3335class Xapian::PostingIterator::Internal : public Xapian::Internal::RefCntBase {
     
    6062     */
    6163    virtual Xapian::doccount get_termfreq_est() const = 0;
    6264
     65    /** Get an estimate for the termfreq and reltermfreq, given the stats.
     66     *
     67     *  The frequencies may be for a combination of databases, or for just the
     68     *  relevant documents, so the results need not lie in the bounds given by
     69     *  get_termfreq_min() and get_termfreq_max().
     70     */
     71    virtual TermFreqs get_termfreq_est_using_stats(
     72        const Xapian::Weight::Internal & stats) const = 0;
     73
    6374    /// Return an upper bound on what get_weight() can return.
    6475    virtual Xapian::weight get_maxweight() const = 0;
    6576
  • xapian-core/common/contiguousalldocspostlist.h

     
    22 * @brief Iterate all document ids when they form a contiguous range.
    33 */
    44/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    2728#include "leafpostlist.h"
    2829
    2930/// A PostList iteratating all docids when they form a contiguous range.
    30 class ContiguousAllDocsPostList : public LeafPostList {
     31class ContiguousAllDocsPostList : public TermBasedLeafPostList {
    3132    /// Don't allow assignment.
    3233    void operator=(const ContiguousAllDocsPostList &);
    3334
     
    4748    /// Constructor.
    4849    ContiguousAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> db_,
    4950                              Xapian::doccount doccount_)
    50         : db(db_), did(0), doccount(doccount_) { }
     51        : TermBasedLeafPostList(std::string()),
     52          db(db_), did(0), doccount(doccount_) { }
    5153
    5254    /** Return the term frequency.
    5355     *
  • xapian-core/common/rset.h

     
    4646        const Xapian::Database root;
    4747        const Xapian::Database::Internal *dbroot;
    4848
    49         Xapian::TermFreqMap reltermfreqs;
     49        std::map<std::string, Xapian::doccount> reltermfreqs;
    5050        bool calculated_reltermfreqs;
    5151
    5252        /** Calculate the statistics.
  • xapian-core/common/weightinternal.h

     
    3131#include <map>
    3232#include <string>
    3333
    34 namespace Xapian {
     34/// A pair holding a termfreq and reltermfreq.
     35struct TermFreqs {
     36    Xapian::doccount termfreq;
     37    Xapian::doccount reltermfreq;
     38
     39    TermFreqs() : termfreq(0), reltermfreq(0) {}
     40    TermFreqs(Xapian::doccount termfreq_, Xapian::doccount reltermfreq_)
     41            : termfreq(termfreq_), reltermfreq(reltermfreq_) {}
     42
     43    void operator +=(const TermFreqs & other) {
     44        termfreq += other.termfreq;
     45        reltermfreq += other.reltermfreq;
     46    }
     47
     48    /// Return a std::string describing this object.
     49    std::string get_description() const;
     50};
    3551
    36 /** A mapping from term to term frequency. */
    37 typedef std::map<std::string, Xapian::doccount> TermFreqMap;
     52namespace Xapian {
    3853
    3954/** Class to hold statistics for a given collection. */
    4055class Weight::Internal {
     
    5166    /** Database to get the bounds on doclength and wdf from. */
    5267    Xapian::Database db;
    5368
    54     /** Map of term frequencies for the collection. */
    55     TermFreqMap termfreq;
    56 
    57     /** Map of relevant term frequencies for the collection. */
    58     TermFreqMap reltermfreq;
     69    /** Map of term frequencies and relevant term frequencies for the
     70     *  collection. */
     71    std::map<std::string, TermFreqs> termfreqs;
    5972
    6073    /** Create a Weight::Internal object with global statistics.
    6174     *
  • xapian-core/common/output.h

     
    8181XAPIAN_OUTPUT_FUNCTION(Xapian::DatabaseReplica)
    8282
    8383#include "weightinternal.h"
     84XAPIAN_OUTPUT_FUNCTION(TermFreqs)
    8485XAPIAN_OUTPUT_FUNCTION(Xapian::Weight::Internal)
    8586
    8687#endif /* XAPIAN_INCLUDED_OUTPUT_H */
  • xapian-core/common/remoteprotocol.h

     
    4040// 30.5: New MSG_GETMSET which expects MSet's percent_factor to be returned.
    4141// 30.6: Support for OP_VALUE_GE and OP_VALUE_LE in query serialisation
    4242// 31: Clean up for Xapian 1.1.0
     43
     44// NOTE: when next breaking compatibility, address the FIXME in
     45// net/serialise.cc in serialise_stats() regarding serialising the termfreq and
     46// reltermfreqs together, rather than as separate lists.
     47
    4348#define XAPIAN_REMOTE_PROTOCOL_MAJOR_VERSION 31
    4449#define XAPIAN_REMOTE_PROTOCOL_MINOR_VERSION 0
    4550
  • xapian-core/common/emptypostlist.h

     
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002,2003,2007,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    2829class EmptyPostList : public LeafPostList {
    2930    public:
    3031        Xapian::doccount get_termfreq() const { return 0; }
     32        TermFreqs get_termfreq_est_using_stats(
     33                const Xapian::Weight::Internal &) const { return TermFreqs(); }
    3134
    3235        Xapian::docid  get_docid() const;
    3336        Xapian::weight get_weight() const;
  • xapian-core/api/leafpostlist.cc

    Property changes on: xapian-core/m4-macros/xapian-1.1.m4
    ___________________________________________________________________
    Deleted: svn:mergeinfo
    
     
    22 * @brief Abstract base class for leaf postlists.
    33 */
    44/* Copyright (C) 2007,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    2425
    2526#include "leafpostlist.h"
    2627#include "omassert.h"
     28#include "debuglog.h"
    2729
    2830LeafPostList::~LeafPostList()
    2931{
     
    7981{
    8082    return LeafPostList::get_maxweight();
    8183}
     84
     85TermFreqs
     86TermBasedLeafPostList::get_termfreq_est_using_stats(
     87        const Xapian::Weight::Internal & stats) const
     88{
     89    LOGCALL(MATCH, TermFreqs,
     90            "TermBasedLeafPostList::get_termfreq_est_using_stats", stats);
     91    if (tname.empty()) {
     92        RETURN(TermFreqs(stats.collection_size, stats.rset_size));
     93    }
     94    std::map<std::string, TermFreqs>::const_iterator i =
     95            stats.termfreqs.find(tname);
     96    RETURN(i->second);
     97}
  • xapian-core/api/omqueryinternal.cc

     
    6565        case Xapian::Query::OP_VALUE_RANGE:
    6666        case Xapian::Query::OP_VALUE_GE:
    6767        case Xapian::Query::OP_VALUE_LE:
     68        case Xapian::Query::OP_SYNONYM:
    6869            return 0;
    6970        case Xapian::Query::OP_SCALE_WEIGHT:
    7071            return 1;
     
    100101        case Xapian::Query::OP_NEAR:
    101102        case Xapian::Query::OP_PHRASE:
    102103        case Xapian::Query::OP_ELITE_SET:
     104        case Xapian::Query::OP_SYNONYM:
    103105            return UINT_MAX;
    104106        default:
    105107            Assert(false);
     
    221223                result += ".";
    222224                result += str_parameter; // serialise_double(get_dbl_parameter());
    223225                break;
     226            case Xapian::Query::OP_SYNONYM:
     227                result += "=";
     228                break;
    224229        }
    225230    }
    226231    return result;
     
    251256        case Xapian::Query::OP_VALUE_GE:        name = "VALUE_GE"; break;
    252257        case Xapian::Query::OP_VALUE_LE:        name = "VALUE_LE"; break;
    253258        case Xapian::Query::OP_SCALE_WEIGHT:    name = "SCALE_WEIGHT"; break;
     259        case Xapian::Query::OP_SYNONYM:         name = "SYNONYM"; break;
    254260    }
    255261    return name;
    256262}
     
    584590                    return qint_from_vector(Xapian::Query::OP_SCALE_WEIGHT,
    585591                                            subqs, 0, param);
    586592                }
    587                 default:
     593                case '=': {
     594                    return qint_from_vector(Xapian::Query::OP_SYNONYM, subqs);
     595                }
     596                default:
    588597                    LOGLINE(UNKNOWN, "Can't parse remainder `" << p - 1 << "'");
    589598                    throw Xapian::InvalidArgumentError("Invalid query string");
    590599            }
     
    809818        case OP_ELITE_SET:
    810819        case OP_OR:
    811820        case OP_XOR:
     821        case OP_SYNONYM:
    812822            // Doing an "OR" type operation - if we've got any MatchNothing
    813823            // subnodes, drop them; except that we mustn't become an empty
    814824            // node due to this, so we never drop a MatchNothing subnode
     
    900910                }
    901911            }
    902912            break;
    903         case OP_OR: case OP_AND: case OP_XOR:
     913        case OP_OR: case OP_AND: case OP_XOR: case OP_SYNONYM:
    904914            // Remove duplicates if we can.
    905915            if (subqs.size() > 1) collapse_subqs();
    906916            break;
     
    944954void
    945955Xapian::Query::Internal::collapse_subqs()
    946956{
    947     Assert(op == OP_OR || op == OP_AND || op == OP_XOR);
     957    Assert(op == OP_OR || op == OP_AND || op == OP_XOR || op == OP_SYNONYM);
    948958    typedef set<Xapian::Query::Internal *, SortPosName> subqtable;
    949959    subqtable sqtab;
    950960
     
    10381048    Assert(!is_leaf(op));
    10391049    if (subq == 0) {
    10401050        subqs.push_back(0);
    1041     } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
     1051    } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) {
    10421052        // Distribute the subquery.
    10431053        for (subquery_list::const_iterator i = subq->subqs.begin();
    10441054             i != subq->subqs.end(); i++) {
     
    10551065    Assert(!is_leaf(op));
    10561066    if (subq == 0) {
    10571067        subqs.push_back(0);
    1058     } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR)) {
     1068    } else if (op == subq->op && (op == OP_AND || op == OP_OR || op == OP_XOR || op == OP_SYNONYM)) {
    10591069        // Distribute the subquery.
    10601070        for (subquery_list::const_iterator i = subq->subqs.begin();
    10611071             i != subq->subqs.end(); i++) {
  • xapian-core/backends/multi/multi_postlist.cc

     
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    5960    return 0;
    6061}
    6162
     63TermFreqs
     64MultiPostList::get_termfreq_est_using_stats(
     65        const Xapian::Weight::Internal &) const
     66{
     67    // Should never get called.
     68    Assert(false);
     69    return TermFreqs();
     70}
     71
    6272Xapian::docid
    6373MultiPostList::get_docid() const
    6474{
  • xapian-core/backends/multi/multi_postlist.h

     
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2003,2005,2007,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    4344        ~MultiPostList();
    4445
    4546        Xapian::doccount get_termfreq() const;
     47        TermFreqs get_termfreq_est_using_stats(
     48                const Xapian::Weight::Internal & stats) const;
    4649
    4750        Xapian::docid  get_docid() const;     // Gets current docid
    4851        Xapian::termcount get_doclength() const; // Get length of current document
  • xapian-core/backends/remote/net_postlist.cc

     
    33 */
    44/* Copyright (C) 2007 Lemur Consulting Ltd
    55 * Copyright (C) 2007,2008,2009 Olly Betts
     6 * Copyright (C) 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    5556PositionList *
    5657NetworkPostList::read_position_list()
    5758{
    58     lastposlist = db->open_position_list(lastdocid, term);
     59    lastposlist = db->open_position_list(lastdocid, tname);
    5960    return lastposlist.get();
    6061}
    6162
    6263PositionList *
    6364NetworkPostList::open_position_list() const
    6465{
    65     return db->open_position_list(lastdocid, term);
     66    return db->open_position_list(lastdocid, tname);
    6667}
    6768
    6869PostList *
     
    104105string
    105106NetworkPostList::get_description() const
    106107{
    107     return "NetworkPostList(" + term + ")";
     108    return "NetworkPostList(" + tname + ")";
    108109}
  • xapian-core/backends/remote/net_postlist.h

     
    11/** @file net_postlist.h
    22 *  @brief Postlists for remote databases
    33 */
    4 /* Copyright (C) 2007 Lemur Consulting Ltd
     4/* Copyright (C) 2007,2009 Lemur Consulting Ltd
    55 * Copyright (C) 2007,2008,2009 Olly Betts
    66 *
    77 * This program is free software; you can redistribute it and/or
     
    3333
    3434/** A postlist in a remote database.
    3535 */
    36 class NetworkPostList : public LeafPostList {
     36class NetworkPostList : public TermBasedLeafPostList {
    3737    friend class RemoteDatabase;
    3838
    3939    Xapian::Internal::RefCntPtr<const RemoteDatabase> db;
    40     string term;
    4140
    4241    string postings;
    4342    bool started;
     
    6160    /// Default constructor.
    6261    NetworkPostList(Xapian::Internal::RefCntPtr<const RemoteDatabase> db_,
    6362                    const string & term_)
    64         : db(db_), term(term_), started(false), pos(NULL), pos_end(NULL),
     63        : TermBasedLeafPostList(term_),
     64          db(db_), started(false), pos(NULL), pos_end(NULL),
    6565          lastdocid(0), lastwdf(0), termfreq(0)
    6666    {
    67         termfreq = db->read_post_list(term, *this);
     67        termfreq = db->read_post_list(tname, *this);
    6868    }
    6969
    7070    /// Get number of documents indexed by this term.
  • xapian-core/backends/inmemory/inmemory_database.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
    6  * Copyright 2006 Richard Boulton
     6 * Copyright 2006,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    7878//////////////
    7979
    8080InMemoryPostList::InMemoryPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_,
    81                                    const InMemoryTerm & term)
    82         : pos(term.docs.begin()),
     81                                   const InMemoryTerm & term,
     82                                   const std::string & tname_)
     83        : TermBasedLeafPostList(tname_),
     84          pos(term.docs.begin()),
    8385          end(term.docs.end()),
    8486          termfreq(term.term_freq),
    8587          started(false),
     
    280282/////////////////////////////
    281283
    282284InMemoryAllDocsPostList::InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_)
    283         : did(0), db(db_)
     285        : TermBasedLeafPostList(std::string()), did(0), db(db_)
    284286{
    285287}
    286288
     
    415417        return new EmptyPostList;
    416418
    417419    Xapian::Internal::RefCntPtr<const InMemoryDatabase> ptrtothis(this);
    418     LeafPostList * pl = new InMemoryPostList(ptrtothis, i->second);
     420    LeafPostList * pl = new InMemoryPostList(ptrtothis, i->second, tname);
    419421    Assert(!pl->at_end());
    420422    return pl;
    421423}
  • xapian-core/backends/inmemory/inmemory_database.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
    6  * Copyright 2006 Richard Boulton
     6 * Copyright 2006,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    132132
    133133/** A PostList in an inmemory database.
    134134 */
    135 class InMemoryPostList : public LeafPostList {
     135class InMemoryPostList : public TermBasedLeafPostList {
    136136    friend class InMemoryDatabase;
    137137    private:
    138138        vector<InMemoryPosting>::const_iterator pos;
     
    148148        Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
    149149
    150150        InMemoryPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db,
    151                          const InMemoryTerm & term);
     151                         const InMemoryTerm & term, const std::string & tname);
    152152    public:
    153153        Xapian::doccount get_termfreq() const;
    154154
     
    170170
    171171/** A PostList over all docs in an inmemory database.
    172172 */
    173 class InMemoryAllDocsPostList : public LeafPostList {
     173class InMemoryAllDocsPostList : public TermBasedLeafPostList {
    174174    friend class InMemoryDatabase;
    175175    private:
    176176        Xapian::docid did;
  • xapian-core/backends/chert/chert_postlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    121121
    122122/** A postlist in a chert database.
    123123 */
    124 class ChertPostList : public LeafPostList {
     124class ChertPostList : public TermBasedLeafPostList {
    125125    protected: // ChertModifiedPostList needs to access these.
    126126        /** The database we are searching.  This pointer is held so that the
    127127         *  database doesn't get deleted before us, and also to give us access
     
    129129         */
    130130        Xapian::Internal::RefCntPtr<const ChertDatabase> this_db;
    131131
    132         /// The termname for this postlist.
    133         string tname;
    134 
    135132        /// Whether we've started reading the list yet.
    136133        bool have_started;
    137134
  • xapian-core/backends/chert/chert_postlist.cc

     
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts
    5  * Copyright 2007,2008 Lemur Consulting Ltd
     5 * Copyright 2007,2008,2009 Lemur Consulting Ltd
    66 *
    77 * This program is free software; you can redistribute it and/or
    88 * modify it under the terms of the GNU General Public License as
     
    660660ChertPostList::ChertPostList(Xapian::Internal::RefCntPtr<const ChertDatabase> this_db_,
    661661                             const string & tname_,
    662662                             bool keep_reference)
    663         : this_db(keep_reference ? this_db_ : NULL),
    664           tname(tname_),
     663        : TermBasedLeafPostList(tname_),
     664          this_db(keep_reference ? this_db_ : NULL),
    665665          have_started(false),
    666666          cursor(this_db_->postlist_table.cursor_get()),
    667667          is_at_end(false)
  • xapian-core/backends/flint/flint_postlist.cc

     
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts
    5  * Copyright 2007 Lemur Consulting Ltd
     5 * Copyright 2007,2009 Lemur Consulting Ltd
    66 *
    77 * This program is free software; you can redistribute it and/or
    88 * modify it under the terms of the GNU General Public License as
     
    654654 */
    655655FlintPostList::FlintPostList(Xapian::Internal::RefCntPtr<const FlintDatabase> this_db_,
    656656                             const string & tname_)
    657         : this_db(this_db_),
    658           tname(tname_),
     657        : TermBasedLeafPostList(tname_),
     658          this_db(this_db_),
    659659          have_started(false),
    660660          cursor(this_db->postlist_table.cursor_get()),
    661661          is_at_end(false)
  • xapian-core/backends/flint/flint_postlist.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2007,2008,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    102102
    103103/** A postlist in a flint database.
    104104 */
    105 class FlintPostList : public LeafPostList {
     105class FlintPostList : public TermBasedLeafPostList {
    106106   protected: // FlintModifiedPostList needs to access these.
    107107        /** The database we are searching.  This pointer is held so that the
    108108         *  database doesn't get deleted before us, and also to give us access
     
    110110         */
    111111        Xapian::Internal::RefCntPtr<const FlintDatabase> this_db;
    112112
    113         /// The termname for this postlist.
    114         string tname;
    115 
    116113        /// Whether we've started reading the list yet.
    117114        bool have_started;
    118115
  • xapian-core/backends/flint/flint_alldocspostlist.h

     
    22 * @brief A PostList which iterates over all documents in a FlintDatabase.
    33 */
    44/* Copyright (C) 2006,2007,2008,2009 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or modify
    78 * it under the terms of the GNU General Public License as published by
     
    2526
    2627#include "leafpostlist.h"
    2728
    28 class FlintAllDocsPostList : public LeafPostList {
     29class FlintAllDocsPostList : public TermBasedLeafPostList {
    2930    /// Don't allow assignment.
    3031    void operator=(const FlintAllDocsPostList &);
    3132
     
    5051  public:
    5152    FlintAllDocsPostList(Xapian::Internal::RefCntPtr<const FlintDatabase> db_,
    5253                         Xapian::doccount doccount_)
    53       : db(db_), doccount(doccount_), cursor(db->termlist_table.cursor_get()),
     54      : TermBasedLeafPostList(std::string()),
     55        db(db_), doccount(doccount_), cursor(db->termlist_table.cursor_get()),
    5456        current_did(0)
    5557    {
    5658        cursor->find_entry("");
  • xapian-bindings/python/smoketest2.py

     
    213213    qp.set_stemming_strategy(qp.STEM_SOME)
    214214    qp.set_stemmer(xapian.Stem('en'))
    215215    expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL),
    216                  "(Zfoo:(pos=1) AND (out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")
     216                 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))")
    217217
    218218    expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL),
    219219                 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")
  • xapian-bindings/python/smoketest3.py

    Property changes on: xapian-bindings/python/generate-python-exceptions
    ___________________________________________________________________
    Deleted: svn:mergeinfo
    
     
    153153
    154154    # Feature test for Document.values
    155155    count = 0
    156     for term in doc.values():
     156    for term in list(doc.values()):
    157157        count += 1
    158158    expect(count, 0, "Unexpected number of entries in doc.values")
    159159
     
    213213    qp.set_stemming_strategy(qp.STEM_SOME)
    214214    qp.set_stemmer(xapian.Stem('en'))
    215215    expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL),
    216                  "(Zfoo:(pos=1) AND (out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")
     216                 "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))")
    217217
    218218    expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL),
    219219                 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")