Ticket #199: matchspy3.patch

File matchspy3.patch, 90.2 KB (added by Richard Boulton, 15 years ago)

Patch as applied to trunk

  • xapian-maintainer-tools/win32msvc/win32_api.mak

     
    2323    $(INTDIR)/errorhandler.obj \
    2424    $(INTDIR)/expanddecider.obj \
    2525    $(INTDIR)/leafpostlist.obj \
     26    $(INTDIR)/matchspy.obj \
    2627    $(INTDIR)/omdatabase.obj \
    2728    $(INTDIR)/omdocument.obj \
    2829    $(INTDIR)/omenquire.obj \
     
    5152    $(INTDIR)/errorhandler.cc\
    5253    $(INTDIR)/expanddecider.cc\
    5354    $(INTDIR)/leafpostlist.cc\
     55    $(INTDIR)/matchspy.cc \
    5456    $(INTDIR)/omdatabase.cc\
    5557    $(INTDIR)/omdocument.cc\
    5658    $(INTDIR)/omenquire.cc\
  • xapian-core/matcher/multimatch.cc

     
    55 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
    66 * Copyright 2003 Orange PCS Ltd
    77 * Copyright 2003 Sam Liddicott
    8  * Copyright 2007,2008 Lemur Consulting Ltd
     8 * Copyright 2007,2008,2009 Lemur Consulting Ltd
    99 *
    1010 * This program is free software; you can redistribute it and/or
    1111 * modify it under the terms of the GNU General Public License as
     
    4747#include "weightinternal.h"
    4848
    4949#include <xapian/errorhandler.h>
     50#include <xapian/matchspy.h>
    5051#include <xapian/version.h> // For XAPIAN_HAS_REMOTE_BACKEND
    5152
    5253#ifdef XAPIAN_HAS_REMOTE_BACKEND
     
    174175    }
    175176}
    176177
     178/// Class which applies several match spies in turn.
     179class MultipleMatchSpy : public Xapian::MatchSpy {
     180  private:
     181    /// List of match spies to call, in order.
     182    const std::vector<Xapian::MatchSpy *> & spies;
     183
     184  public:
     185    MultipleMatchSpy(const std::vector<Xapian::MatchSpy *> & spies_)
     186            : spies(spies_) {}
     187
     188    /** Implementation of virtual operator().
     189     *
     190     *  This implementation calls all the spies in turn.
     191     */
     192    void operator()(const Xapian::Document &doc, Xapian::weight wt);
     193};
     194
     195void
     196MultipleMatchSpy::operator()(const Xapian::Document &doc, Xapian::weight wt) {
     197    LOGCALL_VOID(MATCH, "MultipleMatchSpy::operator()", doc << ", " << wt);
     198    vector<Xapian::MatchSpy *>::const_iterator i;
     199    for (i = spies.begin(); i != spies.end(); ++i) {
     200        (**i)(doc, wt);
     201    }
     202}
     203
    177204////////////////////////////////////
    178205// Initialisation and cleaning up //
    179206////////////////////////////////////
     
    191218                       const Xapian::Sorter * sorter_,
    192219                       Xapian::ErrorHandler * errorhandler_,
    193220                       Xapian::Weight::Internal & stats,
    194                        const Xapian::Weight * weight_)
     221                       const Xapian::Weight * weight_,
     222                       const vector<Xapian::MatchSpy *> & matchspies_)
    195223        : db(db_), query(query_),
    196224          collapse_max(collapse_max_), collapse_key(collapse_key_),
    197225          percent_cutoff(percent_cutoff_), weight_cutoff(weight_cutoff_),
     
    199227          sort_key(sort_key_), sort_by(sort_by_),
    200228          sort_value_forward(sort_value_forward_), sorter(sorter_),
    201229          errorhandler(errorhandler_), weight(weight_),
    202           is_remote(db.internal.size())
     230          is_remote(db.internal.size()),
     231          matchspies(matchspies_)
    203232{
    204233    DEBUGCALL(MATCH, void, "MultiMatch", db_ << ", " << query_ << ", " <<
    205234              qlen << ", " << (omrset ? *omrset : Xapian::RSet()) << ", " <<
     
    229258                rem_db->set_query(query, qlen, collapse_max, collapse_key,
    230259                                  order, sort_key, sort_by, sort_value_forward,
    231260                                  percent_cutoff, weight_cutoff, weight,
    232                                   subrsets[i]);
     261                                  subrsets[i], matchspies);
    233262                bool decreasing_relevance =
    234263                    (sort_by == REL || sort_by == REL_VAL);
    235                 smatch = new RemoteSubMatch(rem_db, decreasing_relevance);
     264                smatch = new RemoteSubMatch(rem_db, decreasing_relevance, matchspies);
    236265                is_remote[i] = true;
    237266            } else {
    238267#endif /* XAPIAN_HAS_REMOTE_BACKEND */
     
    277306                     Xapian::MSet & mset,
    278307                     const Xapian::Weight::Internal & stats,
    279308                     const Xapian::MatchDecider *mdecider,
    280                      const Xapian::MatchDecider *matchspy)
     309                     const Xapian::MatchDecider *matchspy_legacy)
    281310{
    282311    DEBUGCALL(MATCH, void, "MultiMatch::get_mset", first << ", " << maxitems
    283312              << ", " << check_at_least << ", ...");
     
    403432    Xapian::doccount matches_lower_bound = 0;
    404433    Xapian::doccount matches_estimated   = pl->get_termfreq_est();
    405434
    406     if (mdecider == NULL && matchspy == NULL) {
     435    if (mdecider == NULL && matchspy_legacy == NULL) {
    407436        // If we have a matcher decider or match spy, the lower bound must be
    408437        // set to 0 as we could discard all hits.  Otherwise set it to the
    409438        // minimum number of entries which the postlist could return.
    410439        matches_lower_bound = pl->get_termfreq_min();
    411440    }
    412441
     442    // Prepare the matchspy
     443    Xapian::MatchSpy *matchspy = NULL;
     444    MultipleMatchSpy multispy(matchspies);
     445    if (!matchspies.empty()) {
     446        if (matchspies.size() == 1) {
     447            matchspy = matchspies[0];
     448        } else {
     449            matchspy = &multispy;
     450        }
     451    }
     452
    413453    // Check if any results have been asked for (might just be wanting
    414454    // maxweight).
    415455    if (check_at_least == 0) {
     
    437477        return;
    438478    }
    439479
    440     // Number of documents considered by a decider or matchspy.
     480    // Number of documents considered by a decider or matchspy_legacy.
    441481    Xapian::doccount decider_considered = 0;
    442     // Number of documents denied by the decider or matchspy.
     482    // Number of documents denied by the decider or matchspy_legacy.
    443483    Xapian::doccount decider_denied = 0;
    444484
    445485    // Set max number of results that we want - this is used to decide
     
    545585            // VAL, then new_item.wt won't yet be set, but that doesn't
    546586            // matter since it's not used by the sort function.
    547587            if (!mcmp(new_item, min_item)) {
    548                 if (matchspy == NULL && mdecider == NULL && !collapser) {
     588                if (mdecider == NULL && !collapser && matchspy_legacy == NULL) {
    549589                    // Document was definitely suitable for mset - no more
    550590                    // processing needed.
    551591                    LOGLINE(MATCH, "Making note of match item which sorts lower than min_item");
     
    563603                    continue;
    564604                }
    565605                // We can't drop the item, because we need to show it
    566                 // to the matchspy, test whether the mdecider would
     606                // to the matchspy_legacy, test whether the mdecider would
    567607                // accept it, and/or test whether it would be collapsed.
    568608                LOGLINE(MATCH, "Keeping candidate which sorts lower than min_item for further investigation");
    569609            }
    570610        }
    571611
    572612        // Use the match spy and/or decision functors (if specified).
    573         if (matchspy != NULL || mdecider != NULL) {
     613        if (matchspy != NULL || mdecider != NULL || matchspy_legacy != NULL) {
    574614            const unsigned int multiplier = db.internal.size();
    575615            Assert(multiplier != 0);
    576616            Xapian::doccount n = (did - 1) % multiplier; // which actual database
     
    584624                Xapian::Document mydoc(doc.get());
    585625
    586626                ++decider_considered;
    587                 if (matchspy && !matchspy->operator()(mydoc)) {
     627                if (matchspy_legacy && !matchspy_legacy->operator()(mydoc)) {
    588628                    ++decider_denied;
    589629                    continue;
    590630                }
     
    592632                    ++decider_denied;
    593633                    continue;
    594634                }
     635                if (matchspy) {
     636                    if (!calculated_weight) {
     637                        wt = pl->get_weight();
     638                        new_item.wt = wt;
     639                        calculated_weight = true;
     640                    }
     641                    matchspy->operator()(mydoc, wt);
     642                }
    595643            }
    596644        }
    597645
     
    882930                    ", matches_upper_bound=" << matches_upper_bound);
    883931        }
    884932
    885         if (matchspy || mdecider) {
     933        if (mdecider || matchspy_legacy) {
    886934            if (!percent_cutoff) {
    887935                if (!collapser) {
    888936                    // We're not collapsing or doing a percentage cutoff, so
     
    946994                matches_estimated = matches_lower_bound;
    947995        }
    948996
    949         if (collapser || matchspy || mdecider) {
     997        if (collapser || mdecider || matchspy_legacy) {
    950998            LOGLINE(MATCH, "Clamping estimate between bounds: "
    951999                    "matches_lower_bound = " << matches_lower_bound <<
    9521000                    ", matches_estimated = " << matches_estimated <<
     
    9621010                matches_estimated = docs_matched;
    9631011        }
    9641012
    965         if (collapser && !matchspy && !mdecider && !percent_cutoff) {
     1013        if (collapser && !mdecider && !percent_cutoff && !matchspy_legacy) {
    9661014            AssertRel(docs_matched,<=,uncollapsed_upper_bound);
    9671015            if (docs_matched > uncollapsed_lower_bound)
    9681016                uncollapsed_lower_bound = docs_matched;
  • xapian-core/matcher/remotesubmatch.cc

     
    2727#include "remote-database.h"
    2828#include "weightinternal.h"
    2929
    30 RemoteSubMatch::RemoteSubMatch(RemoteDatabase *db_, bool decreasing_relevance_)
    31         : db(db_), decreasing_relevance(decreasing_relevance_)
     30RemoteSubMatch::RemoteSubMatch(RemoteDatabase *db_,
     31                               bool decreasing_relevance_,
     32                               const vector<Xapian::MatchSpy *> & matchspies_)
     33        : db(db_),
     34          decreasing_relevance(decreasing_relevance_),
     35          matchspies(matchspies_)
    3236{
    3337    DEBUGCALL(MATCH, void, "RemoteSubMatch",
    34               db_ << ", " << decreasing_relevance_);
     38              db_ << ", " << decreasing_relevance_ << ", " <<
     39              "matchspies");
    3540}
    3641
    3742bool
     
    6469    DEBUGCALL(MATCH, PostList *, "RemoteSubMatch::get_postlist_and_term_info",
    6570              "[matcher], " << (void*)termfreqandwts << ", " << (void*)total_subqs_ptr);
    6671    Xapian::MSet mset;
    67     db->get_mset(mset);
     72    db->get_mset(mset, matchspies);
    6873    percent_factor = mset.internal->percent_factor;
    6974    if (termfreqandwts) *termfreqandwts = mset.internal->termfreqandwts;
    7075    // For remote databases we report percent_factor rather than counting the
  • xapian-core/matcher/remotesubmatch.h

     
    2626#include "remote-database.h"
    2727#include "xapian/weight.h"
    2828
     29class Xapian::MatchSpy;
     30
    2931/// Class for performing matching on a remote database.
    3032class RemoteSubMatch : public SubMatch {
    3133    /// Don't allow assignment.
     
    4648    /// The factor to use to convert weights to percentages.
    4749    double percent_factor;
    4850
     51    /// The matchspies to use.
     52    const vector<Xapian::MatchSpy *> & matchspies;
     53
    4954  public:
    5055    /// Constructor.
    51     RemoteSubMatch(RemoteDatabase *db_, bool decreasing_relevance_);
     56    RemoteSubMatch(RemoteDatabase *db_,
     57                   bool decreasing_relevance_,
     58                   const vector<Xapian::MatchSpy *> & matchspies);
    5259
    5360    /// Fetch and collate statistics.
    5461    bool prepare_match(bool nowait, Xapian::Weight::Internal & total_stats);
     
    6976    double get_percent_factor() const { return percent_factor; }
    7077
    7178    /// Short-cut for single remote match.
    72     void get_mset(Xapian::MSet & mset) { db->get_mset(mset); }
     79    void get_mset(Xapian::MSet & mset) { db->get_mset(mset, matchspies); }
    7380};
    7481
    7582#endif /* XAPIAN_INCLUDED_REMOTESUBMATCH_H */
  • xapian-core/docs/categorisation.rst

     
    11
    22.. Copyright (C) 2007 Olly Betts
     3.. Copyright (C) 2009 Lemur Consulting Ltd
    34
    45=============================
    56Xapian Categorisation Support
     
    1415lists of category values which feature in matching documents.  There are
    1516numerous potential uses this can be put to, but a common one is to offer the
    1617user the ability to narrow down their search by filtering it to only include
    17 documents with a particular value of a particular category.
     18documents with a particular value of a particular category.  This is often
     19referred to as ``faceted search``.
    1820
    1921Some categories are numeric and can take many different values (examples
    2022include price, width, and height).  The number of different values will often
     
    4345Searching
    4446---------
    4547
    46 At search time, you need to pass a ``Xapian::MatchSpy`` object to
    47 ``Xapian::Enquire::get_mset()``, like so::
     48At search time, you need to pass a ``Xapian::ValueCountMatchSpy`` object for
     49each category you want to look at to ``Xapian::Enquire::add_matchspy()``, like
     50so::
    4851
    49     Xapian::MatchSpy spy;
     52    Xapian::ValueCountMatchSpy spy0(0);
     53    Xapian::ValueCountMatchSpy spy1(1);
     54    Xapian::ValueCountMatchSpy spy3(3);
    5055
    51     spy.add_category(0);
    52     spy.add_category(1);
    53     spy.add_category(3);
    54 
    5556    Xapian::Enquire enq(db);
     57    enq.add_matchspy(spy0);
     58    enq.add_matchspy(spy1);
     59    enq.add_matchspy(spy3);
    5660
    5761    enq.set_query(query);
    5862
    5963    Xapian::MSet mset = enq.get_mset(0, 10, 10000, NULL, NULL, &spy);
    6064
    61 The ``10000`` in the call to ``get_mset`` tells Xapian to check at least
     65The ``10000`` in the call to ``get_mset()`` tells Xapian to check at least
    626610000 documents, so the ``spy`` object will be passed at least 10000 documents
    63 to tally category information from (unless less than 10000 documents match
    64 the query, in which case it will see all of them).  Setting this higher will
    65 make the counts exact, but Xapian will have to do more work for most queries
    66 so searches will be slower.
     67to tally category information from (unless fewer than 10000 documents match the
     68query, in which case it will see all of them).  Setting this higher will make
     69the counts exact, but Xapian will have to do more work for most queries so
     70searches will be slower.
    6771
    68 The ``spy`` object now contains the category information.  You can find out
    69 how many documents it looked at by calling ``spy.get_total()``.  You can
    70 read the values for category ``cat_no`` like this::
     72The ``spy`` objects now contain the category information.  You can find out how
     73many documents they looked at by calling ``spy0.get_total()``.  (All the spies
     74will have looked at the same number of documents.)  You can read the values
     75from, say, ``spy0`` like this::
    7176
    72     const map<string, size_t> & cat = spy.get_categories(cat_no);
     77    const map<string, size_t> & cat = spy0.get_values();
    7378    map<string, size_t>::const_iterator i;
    7479    for (i = cat.begin(); i != cat.end(); ++i) {
    7580        cout << i->first << ": " << i->second << endl;
    7681    }
    7782
    78 You calculate the score for category ``cat_no`` like so::
     83You can calculate a score to indicate how evenly spread the values are using
     84the ``score_evenness`` function like so::
    7985
    80     double score = spy.score_categorisation(cat_num);
     86    double score = Xapian::score_evenness(spy0);
    8187
    8288Or if you prefer categories with 4 or 5 values::
    8389
    84     double score = spy.score_categorisation(cat_num, 4.5);
     90    double score = Xapian::score_evenness(spy0, 4.5);
    8591
    8692The smaller the score, the better - a perfectly even split with exactly the
    8793number of entries asked (or with no preference given for the number of entries)
     
    8995application, but to give you a rough idea, a suitable threshold is likely to be
    9096less than one.
    9197
    92 The scoring uses a sum of squared differences (currently that is - this should
     98The scoring uses a sum of squared differences (currently, that is - this should
    9399probably be regarded as an implementation detail which could change in the
    94100future if we find a better algorithm).
    95101
    96 You would build ranges from numeric values for value ``cat_no``, asking for at
    97 most ``num_ranges`` ranges like so::
     102You can build ranges from numeric values for the values returned from spy
     103``spy0``, asking for at most ``num_ranges`` ranges like so::
    98104
    99     bool result = spy.build_numeric_ranges(cat_no, num_ranges);
     105    std::map<Xapian::NumericRange, Xapian::doccount> result;
     106    Xapian::doccount values_seen;
     107    values_seen = build_numeric_ranges(result, spy0.get_values(), num_ranges);
    100108
    101 If ranges could not be built (for example, because all documents have the
    102 same value for ``cat_no``), ``false`` is returned.  Otherwise ``true`` is
    103 returned, and the spy object's category map for value ``cat_no`` is modified
    104 to consist of ranges.  Keys are now built of strings returned by
    105 ``Xapian::sortable_serialise()`` - either a single string if there is only
    106 one number in a particular range, or for a range a string padded to 9 bytes
    107 with zero bytes, with a second string appended.
     109Here, ``result`` will be filled with a set of numeric ranges (holding at most
     110``num_ranges`` ranges), and ``values_seen`` will be the count of the number of
     111values seen (note - this may be different from the number of documents seen by
     112the matchspy, since some may have no value stored in the slot).
    108113
     114If there are no values seen by the spy, ``result`` will be empty.  If all the
     115values seen by the spy are the same, ``result`` will contain a single entry,
     116with a single range with the same start and end points.
     117
    109118Restricting by category values
    110119------------------------------
    111120
    112 If you're using the categorisation to offer the user choices for narrowing
    113 down their search results, you then need to be able to apply a suitable
    114 filter.
     121If you're using the categorisation to offer the user choices for narrowing down
     122their search results, you then need to be able to apply a suitable filter.
    115123
    116 For a range, the best way is to use ``Xapian::Query::OP_VALUE_RANGE`` to
     124For a range, the easiest way is to use ``Xapian::Query::OP_VALUE_RANGE`` to
    117125build a filter query, and then combine this with the user's query using
    118126``Xapian::Query::OP_FILTER``.
    119127
    120 For a single value, you could use ``Xapian::Query::OP_VALUE_RANGE`` with
    121 the same start and end, or ``Xapian::MatchDecider``, but it's probably
    122 most efficient to also index the categories as suitably prefixed boolean
    123 terms and use those for filtering.
     128For a single value, you could use ``Xapian::Query::OP_VALUE_RANGE`` with the
     129same start and end, or ``Xapian::MatchDecider``, but it's probably most
     130efficient to also index the categories as suitably prefixed boolean terms and
     131use those for filtering.
    124132
    125133Current Limitations
    126134===================
    127135
    128 It's not currently possible to build logarithmic ranges without writing
    129 your own subclass.
    130 
    131 It's not possible to try building different ranges because the original
    132 data is overwritten.  If it's actually useful to do this, the API needs
    133 adjusting.
     136It's not currently possible to build logarithmic ranges with
     137``build_numeric_ranges``.
  • xapian-core/docs/Makefile.am

     
    1717 bm25.html code_structure.html queryparser.html \
    1818 quickstartexpand.cc.html quickstartindex.cc.html quickstartsearch.cc.html
    1919
    20 RSTDOCS = admin_notes.rst deprecation.rst glossary.rst \
     20RSTDOCS = admin_notes.rst categorisation.rst deprecation.rst glossary.rst \
    2121 postingsource.rst replication.rst replication_protocol.rst \
    2222 sorting.rst serialisation.rst spelling.rst synonyms.rst \
    2323 termgenerator.rst valueranges.rst
  • xapian-core/tests/api_matchspy.cc

     
     1/** @file api_matchspy.cc
     2 * @brief tests of MatchSpy usage
     3 */
     4/* Copyright 2007,2009 Lemur Consulting Ltd
     5 *
     6 * This program is free software; you can redistribute it and/or
     7 * modify it under the terms of the GNU General Public License as
     8 * published by the Free Software Foundation; either version 2 of the
     9 * License, or (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
     19 * USA
     20 */
     21
     22#include <config.h>
     23
     24#include "api_matchspy.h"
     25
     26#include <xapian.h>
     27
     28#include "str.h"
     29#include <cmath>
     30#include <map>
     31#include <vector>
     32
     33#include "backendmanager.h"
     34#include "testsuite.h"
     35#include "testutils.h"
     36#include "apitest.h"
     37
     38using namespace std;
     39
     40// #######################################################################
     41// # Tests start here
     42
     43class SimpleMatchSpy : public Xapian::MatchSpy {
     44  public:
     45    // Vector which will be filled with all the document contents seen.
     46    std::vector<std::string> seen;
     47
     48    void operator()(const Xapian::Document &doc,
     49                    Xapian::weight) {
     50        // Note that this is not recommended usage of get_data() - you
     51        // generally shouldn't call get_data() from inside a MatchSpy, because
     52        // it is (likely to be) a slow operation resulting in considerable IO.
     53        seen.push_back(doc.get_data());
     54    }
     55};
     56
     57// Basic test of a matchspy.
     58DEFINE_TESTCASE(matchspy1, backend && !remote) {
     59    Xapian::Database db(get_database("apitest_simpledata"));
     60    Xapian::Enquire enquire(db);
     61    enquire.set_query(Xapian::Query("this"));
     62
     63    SimpleMatchSpy myspy;
     64
     65    Xapian::MSet nospymset = enquire.get_mset(0, 100);
     66    enquire.add_matchspy(&myspy);
     67    Xapian::MSet spymset = enquire.get_mset(0, 100);
     68
     69    // Check that the match estimates aren't affected by the matchspy.
     70    TEST_EQUAL(nospymset, spymset);
     71
     72    vector<bool> docid_checked(db.get_lastdocid());
     73
     74    // Check that we get the expected number of matches, and that the stored
     75    // document contents are right.
     76    Xapian::MSetIterator i = spymset.begin();
     77    TEST(i != spymset.end());
     78    TEST_EQUAL(spymset.size(), 6);
     79    TEST_EQUAL(myspy.seen.size(), spymset.size());
     80
     81    std::sort(myspy.seen.begin(), myspy.seen.end());
     82
     83    std::vector<std::string> seen2;
     84    for ( ; i != spymset.end(); ++i) {
     85        const Xapian::Document doc(i.get_document());
     86        seen2.push_back(doc.get_data());
     87    }
     88    std::sort(seen2.begin(), seen2.end());
     89
     90    TEST_EQUAL(myspy.seen.size(), seen2.size());
     91    std::vector<std::string>::const_iterator j = myspy.seen.begin();
     92    std::vector<std::string>::const_iterator j2 = seen2.begin();
     93    for (; j != myspy.seen.end(); ++j, ++j2) {
     94        TEST_EQUAL(*j, *j2);
     95    }
     96
     97    return true;
     98}
     99
     100static string values_to_repr(const map<string, Xapian::doccount> & cat) {
     101    string resultrepr("|");
     102    map<string, Xapian::doccount>::const_iterator i;
     103    for (i = cat.begin(); i != cat.end(); ++i) {
     104        resultrepr += i->first;
     105        resultrepr += ':';
     106        resultrepr += str(i->second);
     107        resultrepr += '|';
     108    }
     109    return resultrepr;
     110}
     111
     112DEFINE_TESTCASE(matchspy2, writable)
     113{
     114    if (get_dbtype() == "remotetcp" || get_dbtype() == "remoteprog") {
     115        SKIP_TEST("Test not supported for remote backend");
     116    }
     117
     118    Xapian::WritableDatabase db = get_writable_database("");
     119    for (int c = 1; c <= 25; ++c) {
     120        Xapian::Document doc;
     121        doc.set_data("Document " + str(c));
     122        int factors = 0;
     123        for (int factor = 1; factor <= c; ++factor) {
     124            doc.add_term("all");
     125            if (c % factor == 0) {
     126                doc.add_term("XFACT" + str(factor));
     127                ++factors;
     128            }
     129        }
     130
     131        // Number of factors.
     132        doc.add_value(0, str(factors));
     133        // Units digits.
     134        doc.add_value(1, str(c % 10));
     135        // Constant.
     136        doc.add_value(2, "fish");
     137        // Number of digits.
     138        doc.add_value(3, str(str(c).size()));
     139
     140        db.add_document(doc);
     141    }
     142
     143    Xapian::ValueCountMatchSpy spy0(0);
     144    Xapian::ValueCountMatchSpy spy1(1);
     145    Xapian::ValueCountMatchSpy spy3(3);
     146
     147    Xapian::Enquire enq(db);
     148
     149    enq.set_query(Xapian::Query("all"));
     150
     151    enq.add_matchspy(&spy0);
     152    enq.add_matchspy(&spy1);
     153    enq.add_matchspy(&spy3);
     154    Xapian::MSet mset = enq.get_mset(0, 10);
     155
     156    TEST_EQUAL(spy0.get_total(), 25);
     157    TEST_EQUAL(spy1.get_total(), 25);
     158    TEST_EQUAL(spy3.get_total(), 25);
     159
     160    static const char * results[] = {
     161        "|1:1|2:9|3:3|4:7|5:1|6:3|8:1|",
     162        "|0:2|1:3|2:3|3:3|4:3|5:3|6:2|7:2|8:2|9:2|",
     163        "|1:9|2:16|",
     164    };
     165    TEST_STRINGS_EQUAL(values_to_repr(spy0.get_values()), results[0]);
     166    TEST_STRINGS_EQUAL(values_to_repr(spy1.get_values()), results[1]);
     167    TEST_STRINGS_EQUAL(values_to_repr(spy3.get_values()), results[2]);
     168                       
     169    {
     170        // Test scoring evenness returns scores with the natural ordering.
     171        double score0 = Xapian::score_evenness(spy0);
     172        tout << "score0 = " << score0 << endl;
     173        double score1 = Xapian::score_evenness(spy1);
     174        tout << "score1 = " << score1 << endl;
     175        double score3 = Xapian::score_evenness(spy3);
     176        tout << "score3 = " << score3 << endl;
     177        // 1 is obviously best, and 0 obviously worst.
     178        TEST(score1 < score3);
     179        TEST(score3 < score0);
     180
     181        // Check that the using the expanded form gives the same results.
     182        double score0_check = Xapian::score_evenness(spy0.get_values(), spy0.get_total());
     183        tout << "score0_check = " << score0_check << endl;
     184        TEST_EQUAL(score0, score0_check);
     185    }
     186
     187    {
     188        // Test scoring evenness and about 7 categories returns scores with the
     189        // natural ordering.
     190        double score0 = Xapian::score_evenness(spy0, 7);
     191        tout << "score0 = " << score0 << endl;
     192        double score1 = Xapian::score_evenness(spy1, 7);
     193        tout << "score1 = " << score1 << endl;
     194        double score3 = Xapian::score_evenness(spy3, 7);
     195        tout << "score3 = " << score3 << endl;
     196        // 3 is clearly worst - 0 is arguably a little better than 1 (0 is the
     197        // requested size, but 1 has a much more even split).
     198        TEST(score0 < score1);
     199        TEST(score1 < score3);
     200
     201        // Check that the using the expanded form gives the same results.
     202        double score0_check = Xapian::score_evenness(spy0.get_values(), spy0.get_total());
     203        tout << "score0_check = " << score0_check << endl;
     204        TEST_EQUAL(score0, score0_check);
     205    }
     206
     207    return true;
     208}
     209
     210DEFINE_TESTCASE(matchspy3, writable)
     211{
     212    if (get_dbtype() == "remotetcp" || get_dbtype() == "remoteprog") {
     213        SKIP_TEST("Test not supported for remote backend");
     214    }
     215
     216    Xapian::WritableDatabase db = get_writable_database("");
     217    for (int c = 1; c <= 25; ++c) {
     218        Xapian::Document doc;
     219        doc.set_data("Document " + str(c));
     220        int factors = 0;
     221        for (int factor = 1; factor <= c; ++factor) {
     222            doc.add_term("all");
     223            if (c % factor == 0) {
     224                doc.add_term("XFACT" + str(factor));
     225                ++factors;
     226            }
     227        }
     228
     229        // Number of factors.
     230        doc.add_value(0, Xapian::sortable_serialise(factors));
     231        // Units digits.
     232        doc.add_value(1, Xapian::sortable_serialise(c % 10));
     233        // (x + 1/3)*(x + 1/3).
     234        doc.add_value(2, Xapian::sortable_serialise((c + 1.0/3.0) * (c + 1.0/3.0)));
     235        // Reciprocal.
     236        doc.add_value(3, Xapian::sortable_serialise(floor(100.0 / c)));
     237
     238        db.add_document(doc);
     239    }
     240
     241    Xapian::ValueCountMatchSpy spy0(0);
     242    Xapian::ValueCountMatchSpy spy1(1);
     243    Xapian::ValueCountMatchSpy spy2(2);
     244    Xapian::ValueCountMatchSpy spy3(3);
     245
     246    Xapian::Enquire enq(db);
     247
     248    enq.set_query(Xapian::Query("all"));
     249
     250    enq.add_matchspy(&spy0);
     251    enq.add_matchspy(&spy1);
     252    enq.add_matchspy(&spy2);
     253    enq.add_matchspy(&spy3);
     254    Xapian::MSet mset = enq.get_mset(0, 10);
     255
     256    TEST_EQUAL(spy0.get_total(), 25);
     257    TEST_EQUAL(spy1.get_total(), 25);
     258    TEST_EQUAL(spy2.get_total(), 25);
     259    TEST_EQUAL(spy3.get_total(), 25);
     260
     261    static const string results[] = {
     262        "|100:1|200:9|300:3|400:7|500:1|600:3|800:1|",
     263        "|0..200:8|300..400:6|500..700:7|800..900:4|",
     264        "|177..8711:9|10677..17777:4|20544..26677:3|30044..37377:3|41344..49877:3|54444..59211:2|64177:1|",
     265        "|400..900:15|1000..1600:5|2000..2500:2|3300:1|5000:1|10000:1|",
     266        ""
     267    };
     268    std::vector<Xapian::ValueCountMatchSpy *> spies;
     269    spies.push_back(&spy0);
     270    spies.push_back(&spy1);
     271    spies.push_back(&spy2);
     272    spies.push_back(&spy3);
     273    for (Xapian::valueno v = 0; !results[v].empty(); ++v) {
     274        Xapian::doccount total_seen;
     275        std::map<Xapian::NumericRange, Xapian::doccount> ranges;
     276        total_seen = Xapian::build_numeric_ranges(ranges, spies[v]->get_values(), 7);
     277        if (results[v] == "|") {
     278            TEST_EQUAL(total_seen, 0);
     279            continue;
     280        }
     281        TEST_NOT_EQUAL(total_seen, 0);
     282        TEST(ranges.size() <= 7);
     283        string resultrepr("|");
     284        map<Xapian::NumericRange, Xapian::doccount>::const_iterator i;
     285        for (i = ranges.begin(); i != ranges.end(); ++i) {
     286            if (i->first.get_lower() != i->first.get_upper()) {
     287                resultrepr += str(floor(i->first.get_lower() * 100));
     288                resultrepr += "..";
     289                resultrepr += str(floor(i->first.get_upper() * 100));
     290            } else {
     291                double start = floor(i->first.get_lower() * 100);
     292                resultrepr += str(start);
     293            }
     294            resultrepr += ':';
     295            resultrepr += str(i->second);
     296            resultrepr += '|';
     297        }
     298        tout << "value " << v << endl;
     299        TEST_STRINGS_EQUAL(resultrepr, results[v]);
     300    }
     301
     302    return true;
     303}
     304
     305DEFINE_TESTCASE(matchspy4, writable)
     306{
     307    if (get_dbtype() == "remotetcp" || get_dbtype() == "remoteprog") {
     308        SKIP_TEST("Test not supported for remote backend");
     309    }
     310
     311    Xapian::WritableDatabase db = get_writable_database("");
     312    for (int c = 1; c <= 25; ++c) {
     313        Xapian::Document doc;
     314        doc.set_data("Document " + str(c));
     315        int factors = 0;
     316        for (int factor = 1; factor <= c; ++factor) {
     317            doc.add_term("all");
     318            if (c % factor == 0) {
     319                doc.add_term("XFACT" + str(factor));
     320                ++factors;
     321            }
     322        }
     323
     324        // Number of factors.
     325        doc.add_value(0, str(factors));
     326        // Units digits.
     327        doc.add_value(1, str(c % 10));
     328        // Constant.
     329        doc.add_value(2, "fish");
     330        // Number of digits.
     331        doc.add_value(3, str(str(c).size()));
     332
     333        db.add_document(doc);
     334    }
     335
     336    Xapian::ValueCountMatchSpy spy0(0);
     337    Xapian::ValueCountMatchSpy spy1(1);
     338    Xapian::ValueCountMatchSpy spy3(3);
     339
     340    Xapian::Enquire enq(db);
     341
     342    enq.set_query(Xapian::Query("all"));
     343
     344    enq.add_matchspy(&spy0);
     345    enq.add_matchspy(&spy1);
     346    enq.add_matchspy(&spy3);
     347    Xapian::MSet mset = enq.get_mset(0, 10);
     348
     349    TEST_EQUAL(spy0.get_total(), 25);
     350    TEST_EQUAL(spy1.get_total(), 25);
     351    TEST_EQUAL(spy3.get_total(), 25);
     352
     353    static const char * results[] = {
     354        "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
     355        "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
     356        "|",
     357        "|2:16|1:9|",
     358        NULL
     359    };
     360    std::vector<Xapian::ValueCountMatchSpy *> spies;
     361    spies.push_back(&spy0);
     362    spies.push_back(&spy1);
     363    spies.push_back(NULL);
     364    spies.push_back(&spy3);
     365    for (Xapian::valueno v = 0; results[v]; ++v) {
     366        tout << "value " << v << endl;
     367        std::vector<Xapian::StringAndFrequency> allvals;
     368
     369        Xapian::ValueCountMatchSpy * spy = spies[v];
     370        if (spy != NULL)
     371            spy->get_top_values(allvals, 100);
     372        string allvals_str("|");
     373        for (size_t i = 0; i < allvals.size(); ++i) {
     374            allvals_str += allvals[i].get_string();
     375            allvals_str += ':';
     376            allvals_str += str(allvals[i].get_frequency());
     377            allvals_str += '|';
     378        }
     379        tout << allvals_str << endl;
     380        TEST_STRINGS_EQUAL(allvals_str, results[v]);
     381
     382        std::vector<Xapian::StringAndFrequency> vals;
     383        for (size_t i = 0; i < allvals.size(); ++i) {
     384            tout << "i " << i << endl;
     385            if (spy != NULL)
     386                spy->get_top_values(vals, i);
     387            for (size_t j = 0; j < vals.size(); j++) {
     388                tout << "j " << j << endl;
     389                TEST_EQUAL(vals[j].get_string(), allvals[j].get_string());
     390                TEST_EQUAL(vals[j].get_frequency(), allvals[j].get_frequency());
     391            }
     392        }
     393    }
     394
     395    return true;
     396}
     397
     398// Test builtin match spies
     399DEFINE_TESTCASE(matchspy5, backend)
     400{
     401    Xapian::Database db(get_database("apitest_simpledata"));
     402    Xapian::Enquire enquire(db);
     403    enquire.set_query(Xapian::Query("this"));
     404
     405    Xapian::ValueCountMatchSpy myspy1(1);
     406    Xapian::ValueCountMatchSpy myspy2(1);
     407
     408    enquire.add_matchspy(&myspy1);
     409    enquire.add_matchspy(&myspy2);
     410    Xapian::MSet mymset = enquire.get_mset(0, 100);
     411    TEST_EQUAL(mymset.size(), 6);
     412
     413    const std::map<std::string, Xapian::doccount> & vals1 = myspy1.get_values();
     414    const std::map<std::string, Xapian::doccount> & vals2 = myspy2.get_values();
     415
     416    TEST_EQUAL(vals1.size(), 2);
     417    TEST(vals1.find("h") != vals1.end());
     418    TEST(vals1.find("n") != vals1.end());
     419    TEST_EQUAL(vals1.find("h")->second, 5);
     420    TEST_EQUAL(vals1.find("n")->second, 1);
     421
     422    TEST_EQUAL(vals2.size(), 2);
     423    TEST(vals2.find("h") != vals2.end());
     424    TEST(vals2.find("n") != vals2.end());
     425    TEST_EQUAL(vals2.find("h")->second, 5);
     426    TEST_EQUAL(vals2.find("n")->second, 1);
     427
     428    return true;
     429}
     430
     431class MySpy : public Xapian::MatchSpy {
     432    void operator()(const Xapian::Document &, Xapian::weight) {
     433    }
     434};
     435
     436// Test exceptions from matchspy base class, and get_description method.
     437DEFINE_TESTCASE(matchspy6, !backend)
     438{
     439    MySpy spy;
     440
     441    TEST_EXCEPTION(Xapian::UnimplementedError, spy.clone());
     442    TEST_EXCEPTION(Xapian::UnimplementedError, spy.name());
     443    TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise());
     444    TEST_EXCEPTION(Xapian::UnimplementedError,
     445                   spy.unserialise(std::string(),
     446                                   Xapian::SerialisationContext()));
     447    TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise_results());
     448    TEST_EXCEPTION(Xapian::UnimplementedError,
     449                   spy.merge_results(std::string()));
     450    TEST_EQUAL(spy.get_description(), "Xapian::MatchSpy()");
     451
     452    return true;
     453}
     454
     455/// Test that NumericRange comparisons work correctly.
     456DEFINE_TESTCASE(numericrange1, !backend)
     457{
     458    Xapian::NumericRange n1(0, 0);
     459    Xapian::NumericRange n2(0, 1);
     460    Xapian::NumericRange n3(1, 1);
     461    Xapian::NumericRange n4(2, 1);
     462
     463    TEST(!(n1 < n1));
     464    TEST(n1 < n2);
     465    TEST(!(n2 < n1));
     466    TEST(n2 < n3);
     467    TEST(!(n3 < n2));
     468    TEST(n3 < n4);
     469    TEST(!(n4 < n3));
     470    return true;
     471}
  • xapian-core/tests/Makefile.am

    Property changes on: xapian-core/tests/api_matchspy.cc
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    115115 api_collapse.cc \
    116116 api_db.cc \
    117117 api_generated.cc \
     118 api_matchspy.cc \
    118119 api_metadata.cc \
    119120 api_nodb.cc \
    120121 api_opsynonym.cc \
  • xapian-core/include/xapian/enquire.h

     
    44/* Copyright 1999,2000,2001 BrightStation PLC
    55 * Copyright 2001,2002 Ananova Ltd
    66 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
     7 * Copyright 2009 Lemur Consulting Ltd
    78 *
    89 * This program is free software; you can redistribute it and/or
    910 * modify it under the terms of the GNU General Public License as
     
    3940class Document;
    4041class ErrorHandler;
    4142class ExpandDecider;
     43class MatchSpy;
    4244class MSetIterator;
    4345class Query;
    4446class Weight;
     
    689691         */
    690692        const Xapian::Query & get_query() const;
    691693
     694        /** Add a matchspy.
     695         *
     696         *  This matchspy will be called with some of the documents which match
     697         *  the query, during the match process.  Exactly which of the matching
     698         *  documents are passed to it depends on exactly when certain
     699         *  optimisations occur during the match process, but it can be
     700         *  controlled to some extent by setting the @a checkatleast parameter
     701         *  to @a get_mset().
     702         *
     703         *  In particular, if there are enough matching documents, at least the
     704         *  number specified by @a checkatleast will be passed to the matchspy.
     705         *  This means that you can force the matchspy to be shown all matching
     706         *  documents by setting @a checkatleast to the number of documents in
     707         *  the database.
     708         *
     709         *  @param spy       The MatchSpy subclass to add.  The caller must
     710         *                   ensure that this remains valid while the Enquire
     711         *                   object remains active, or until @a
     712         *                   clear_matchspies() is called.
     713         */
     714        void add_matchspy(MatchSpy * spy);
     715
     716        /** Remove all the matchspies.
     717         */
     718        void clear_matchspies();
     719
    692720        /** Set the weighting scheme to use for queries.
    693721         *
    694722         *  @param weight_  the new weighting scheme.  If no weighting scheme
  • xapian-core/include/xapian/serialisationcontext.h

     
    3131// Forward declarations.
    3232class Weight;
    3333class PostingSource;
     34class MatchSpy;
    3435
    3536/** A context for serialisation.
    3637 *
     
    9293     */
    9394    const Xapian::PostingSource *
    9495            get_posting_source(const std::string & name) const;
     96
     97    /// Register a user-defined match spy class.
     98    void register_match_spy(const Xapian::MatchSpy &spy);
     99
     100    /** Get a match spy given a name.
     101     *
     102     *  The returned match spy is owned by the context object.
     103     *
     104     *  Returns NULL if the match spy could not be found.
     105     */
     106    const Xapian::MatchSpy *
     107            get_match_spy(const std::string & name) const;
    95108};
    96109
    97110}
  • xapian-core/include/xapian/matchspy.h

     
     1/** @file matchspy.h
     2 * @brief MatchSpy implementation.
     3 */
     4/* Copyright (C) 2007,2008 Olly Betts
     5 * Copyright (C) 2007,2009 Lemur Consulting Ltd
     6 *
     7 * This program is free software; you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation; either version 2 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * This program is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with this program; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     20 */
     21
     22#ifndef XAPIAN_INCLUDED_MATCHSPY_H
     23#define XAPIAN_INCLUDED_MATCHSPY_H
     24
     25#include <xapian/enquire.h>
     26#include <xapian/visibility.h>
     27
     28#include <string>
     29#include <map>
     30#include <set>
     31#include <string>
     32#include <vector>
     33
     34namespace Xapian {
     35
     36class Document;
     37class SerialisationContext;
     38
     39/** Abstract base class for match spies.
     40 *
     41 *  The subclasses will generally accumulate information seen during the match,
     42 *  to calculate aggregate functions, or other profiles of the matching
     43 *  documents.
     44 */
     45class XAPIAN_VISIBILITY_DEFAULT MatchSpy {
     46  private:
     47    /// Don't allow assignment.
     48    void operator=(const MatchSpy &);
     49
     50    /// Don't allow copying.
     51    MatchSpy(const MatchSpy &);
     52
     53  protected:
     54    /// Default constructor, needed by subclass constructors.
     55    MatchSpy() {}
     56
     57  public:
     58    /** Virtual destructor, because we have virtual methods. */
     59    virtual ~MatchSpy();
     60
     61    /** Register a document with the match spy.
     62     *
     63     *  This is called by the matcher once with each document seen by the
     64     *  matcher during the match process.  Note that the matcher will often not
     65     *  see all the documents which match the query, due to optimisations which
     66     *  allow low-weighted documents to be skipped, and allow the match process
     67     *  to be terminated early.
     68     *
     69     *  @param doc The document seen by the match spy.
     70     *  @param wt The weight of the document.
     71     */
     72    virtual void operator()(const Xapian::Document &doc,
     73                            Xapian::weight wt) = 0;
     74
     75    /** Clone the match spy.
     76     *
     77     *  The clone should inherit the configuration of the parent, but need not
     78     *  inherit the state.  ie, the clone does not need to be passed
     79     *  information about the results seen by the parent.
     80     *
     81     *  If you don't want to support the remote backend in your match spy, you
     82     *  can use the default implementation which simply throws
     83     *  Xapian::UnimplementedError.
     84     *
     85     *  Note that the returned object will be deallocated by Xapian after use
     86     *  with "delete".  It must therefore have been allocated with "new".
     87     */
     88    virtual MatchSpy * clone() const;
     89
     90    /** Return the name of this match spy.
     91     *
     92     *  This name is used by the remote backend.  It is passed with the
     93     *  serialised parameters to the remote server so that it knows which class
     94     *  to create.
     95     *
     96     *  Return the full namespace-qualified name of your class here - if your
     97     *  class is called MyApp::FooMatchSpy, return "MyApp::FooMatchSpy" from
     98     *  this method.
     99     *
     100     *  If you don't want to support the remote backend in your match spy, you
     101     *  can use the default implementation which simply throws
     102     *  Xapian::UnimplementedError.
     103     */
     104    virtual std::string name() const;
     105
     106    /** Return this object's parameters serialised as a single string.
     107     *
     108     *  If you don't want to support the remote backend in your match spy, you
     109     *  can use the default implementation which simply throws
     110     *  Xapian::UnimplementedError.
     111     */
     112    virtual std::string serialise() const;
     113
     114    /** Unserialise parameters.
     115     *
     116     *  This method unserialises parameters serialised by the @a serialise()
     117     *  method and allocates and returns a new object initialised with them.
     118     *
     119     *  If you don't want to support the remote backend in your match spy, you
     120     *  can use the default implementation which simply throws
     121     *  Xapian::UnimplementedError.
     122     *
     123     *  Note that the returned object will be deallocated by Xapian after use
     124     *  with "delete".  It must therefore have been allocated with "new".
     125     */
     126    virtual MatchSpy * unserialise(const std::string & s,
     127                                   const SerialisationContext & context) const;
     128
     129    /** Serialise the results of this match spy.
     130     *
     131     *  If you don't want to support the remote backend in your match spy, you
     132     *  can use the default implementation which simply throws
     133     *  Xapian::UnimplementedError.
     134     */
     135    virtual std::string serialise_results() const;
     136
     137    /** Unserialise some results, and merge them into this matchspy.
     138     *
     139     *  The order in which results are merged should not be significant, since
     140     *  this order is not specified (and will vary depending on the speed of
     141     *  the search in each sub-database).
     142     *
     143     *  If you don't want to support the remote backend in your match spy, you
     144     *  can use the default implementation which simply throws
     145     *  Xapian::UnimplementedError.
     146     */
     147    virtual void merge_results(const std::string & s);
     148
     149    /** Return a string describing this object.
     150     *
     151     *  This default implementation returns a generic answer, to avoid forcing
     152     *  those deriving their own MatchSpy subclasses from having to implement
     153     *  this (they may not care what get_description() gives for their
     154     *  subclass).
     155     */
     156    virtual std::string get_description() const;
     157};
     158
     159
     160/** A string with a corresponding frequency.
     161 */
     162class XAPIAN_VISIBILITY_DEFAULT StringAndFrequency {
     163    std::string str;
     164    Xapian::doccount frequency;
     165  public:
     166    StringAndFrequency(std::string str_, Xapian::doccount frequency_)
     167            : str(str_), frequency(frequency_) {}
     168
     169    std::string get_string() const { return str; }
     170    Xapian::doccount get_frequency() const { return frequency; }
     171};
     172
     173
     174/// Class for counting the frequencies of values in the matching documents.
     175class XAPIAN_VISIBILITY_DEFAULT ValueCountMatchSpy : public MatchSpy {
     176  protected:
     177    /// The slot to count.
     178    Xapian::valueno slot;
     179
     180    /// Total number of documents seen by the match spy.
     181    Xapian::doccount total;
     182
     183    /// The values seen so far, together with their frequency.
     184    std::map<std::string, Xapian::doccount> values;
     185
     186  public:
     187    /// Construct an empty ValueCountMatchSpy.
     188    ValueCountMatchSpy() : slot(Xapian::BAD_VALUENO), total(0) {}
     189
     190    /** Construct a MatchSpy which counts the values in a particular slot.
     191     *
     192     *  Further slots can be added by calling @a add_slot().
     193     */
     194    ValueCountMatchSpy(Xapian::valueno slot_)
     195            : slot(slot_), total(0) {
     196    }
     197
     198    /// Return the values seen in the slot.
     199    const std::map<std::string, Xapian::doccount> & get_values() const {
     200        return values;
     201    }
     202
     203    /** Return the total number of documents tallied. */
     204    size_t get_total() const {
     205        return total;
     206    }
     207
     208    /** Get the most frequent values in the slot.
     209     *
     210     *  @param result A vector which will be filled with the most frequent
     211     *                values, in descending order of frequency.  Values with
     212     *                the same frequency will be sorted in ascending
     213     *                alphabetical order.
     214     *
     215     *  @param maxvalues The maximum number of values to return.
     216     */
     217    void get_top_values(std::vector<StringAndFrequency> & result,
     218                        size_t maxvalues) const;
     219
     220    /** Implementation of virtual operator().
     221     *
     222     *  This implementation tallies values for a matching document.
     223     */
     224    void operator()(const Xapian::Document &doc, Xapian::weight wt);
     225
     226    virtual MatchSpy * clone() const;
     227    virtual std::string name() const;
     228    virtual std::string serialise() const;
     229    virtual MatchSpy * unserialise(const std::string & s,
     230                                   const SerialisationContext & context) const;
     231    virtual std::string serialise_results() const;
     232    virtual void merge_results(const std::string & s);
     233    virtual std::string get_description() const;
     234};
     235
     236
     237/** A numeric range.
     238 *
     239 *  This is used to represent ranges of values returned by the match spies.
     240 */
     241class XAPIAN_VISIBILITY_DEFAULT NumericRange {
     242    /// The lower value in the range.
     243    double lower;
     244
     245    /// The upper value in the range.
     246    double upper;
     247
     248  public:
     249    NumericRange(double lower_, double upper_)
     250            : lower(lower_), upper(upper_) {}
     251
     252    double get_lower() const { return lower; }
     253    double get_upper() const { return upper; }
     254
     255    bool operator<(const NumericRange & other) const {
     256        if (lower < other.lower) return true;
     257        if (lower > other.lower) return false;
     258        return (upper < other.upper);
     259    }
     260};
     261
     262
     263/** Return a score reflecting how evenly divided a set of values is.
     264 *
     265 *  If you don't want to show a poor categorisation, or have multiple
     266 *  categories and only space in your user interface to show a few, you want to
     267 *  be able to decide how "good" a categorisation is.  One definition of "good"
     268 *  is that it offers a fairly even split of the available values, and
     269 *  (optionally) about a specified number of options.
     270 *
     271 *  @param values The values making up the categorisation, together with their
     272 *  frequencies.
     273 *
     274 *  @param total The total number of documents seen.
     275 *
     276 *  @param desired_no_of_categories The desired number of categories - this is
     277 *  a floating point value, so you can ask for 5.5 if you'd like "about 5 or 6
     278 *  categories".  The default is to desire the number of categories that there
     279 *  actually are, so the score then only reflects how even the split is.
     280
     281 *  @return A score for the categorisation for the value - lower is better,
     282 *  with a perfectly even split across the right number of categories scoring
     283 *  0.
     284 */
     285//@{
     286double XAPIAN_VISIBILITY_DEFAULT score_evenness(
     287        const std::map<std::string, Xapian::doccount> & values,
     288        Xapian::doccount total,
     289        double desired_no_of_categories = 0.0);
     290double XAPIAN_VISIBILITY_DEFAULT score_evenness(
     291        const std::map<Xapian::NumericRange, Xapian::doccount> & values,
     292        Xapian::doccount total,
     293        double desired_no_of_categories = 0.0);
     294double XAPIAN_VISIBILITY_DEFAULT score_evenness(
     295        const ValueCountMatchSpy & spy,
     296        double desired_no_of_categories = 0.0);
     297//@}
     298
     299
     300/** Turn a category containing sort-encoded numeric values into a set of
     301 *  ranges.
     302 *
     303 *  For "continuous" values (such as price, height, weight, etc), there will
     304 *  usually be too many different values to offer the user, and the user won't
     305 *  want to restrict to an exact value anyway.
     306 *
     307 *  This method produces a set of NumericRange objects for a particular value
     308 *  number.
     309 *
     310 *  @param result     Used to return the resulting ranges.
     311 *  @param values     The values representing the initial numbers.
     312 *  @param max_ranges Group into at most this many ranges.
     313 *
     314 *  @return The number of values seen.
     315 */
     316doccount XAPIAN_VISIBILITY_DEFAULT build_numeric_ranges(
     317        std::map<Xapian::NumericRange, Xapian::doccount> & result,
     318        const std::map<std::string, Xapian::doccount> & values,
     319        size_t max_ranges);
     320
     321}
     322
     323#endif // XAPIAN_INCLUDED_MATCHSPY_H
  • xapian-core/include/Makefile.mk

    Property changes on: xapian-core/include/xapian/matchspy.h
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    1919        include/xapian/enquire.h\
    2020        include/xapian/errorhandler.h\
    2121        include/xapian/expanddecider.h\
     22        include/xapian/matchspy.h\
    2223        include/xapian/positioniterator.h\
    2324        include/xapian/postingiterator.h\
    2425        include/xapian/postingsource.h\
  • xapian-core/include/xapian.h

     
    4545// Searching
    4646#include <xapian/enquire.h>
    4747#include <xapian/expanddecider.h>
     48#include <xapian/matchspy.h>
    4849#include <xapian/postingsource.h>
    4950#include <xapian/query.h>
    5051#include <xapian/queryparser.h>
  • xapian-core/net/serialise.cc

     
    203203    const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts
    204204        = mset.internal->termfreqandwts;
    205205
     206    result += encode_length(termfreqandwts.size());
    206207    map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j;
    207208    for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) {
    208209        result += encode_length(j->first.size());
     
    215216}
    216217
    217218Xapian::MSet
    218 unserialise_mset(const string &s)
     219unserialise_mset(const char ** p, const char * p_end)
    219220{
    220     const char * p = s.data();
    221     const char * p_end = p + s.size();
     221    Xapian::doccount firstitem = decode_length(p, p_end, false);
     222    Xapian::doccount matches_lower_bound = decode_length(p, p_end, false);
     223    Xapian::doccount matches_estimated = decode_length(p, p_end, false);
     224    Xapian::doccount matches_upper_bound = decode_length(p, p_end, false);
     225    Xapian::doccount uncollapsed_lower_bound = decode_length(p, p_end, false);
     226    Xapian::doccount uncollapsed_estimated = decode_length(p, p_end, false);
     227    Xapian::doccount uncollapsed_upper_bound = decode_length(p, p_end, false);
     228    Xapian::weight max_possible = unserialise_double(p, p_end);
     229    Xapian::weight max_attained = unserialise_double(p, p_end);
    222230
    223     Xapian::doccount firstitem = decode_length(&p, p_end, false);
    224     Xapian::doccount matches_lower_bound = decode_length(&p, p_end, false);
    225     Xapian::doccount matches_estimated = decode_length(&p, p_end, false);
    226     Xapian::doccount matches_upper_bound = decode_length(&p, p_end, false);
    227     Xapian::doccount uncollapsed_lower_bound = decode_length(&p, p_end, false);
    228     Xapian::doccount uncollapsed_estimated = decode_length(&p, p_end, false);
    229     Xapian::doccount uncollapsed_upper_bound = decode_length(&p, p_end, false);
    230     Xapian::weight max_possible = unserialise_double(&p, p_end);
    231     Xapian::weight max_attained = unserialise_double(&p, p_end);
     231    double percent_factor = unserialise_double(p, p_end);
    232232
    233     double percent_factor = unserialise_double(&p, p_end);
    234 
    235233    vector<Xapian::Internal::MSetItem> items;
    236     size_t msize = decode_length(&p, p_end, false);
     234    size_t msize = decode_length(p, p_end, false);
    237235    while (msize-- > 0) {
    238         Xapian::weight wt = unserialise_double(&p, p_end);
    239         Xapian::docid did = decode_length(&p, p_end, false);
    240         size_t len = decode_length(&p, p_end, true);
    241         string key(p, len);
    242         p += len;
     236        Xapian::weight wt = unserialise_double(p, p_end);
     237        Xapian::docid did = decode_length(p, p_end, false);
     238        size_t len = decode_length(p, p_end, true);
     239        string key(*p, len);
     240        *p += len;
    243241        items.push_back(Xapian::Internal::MSetItem(wt, did, key,
    244                                                    decode_length(&p, p_end, false)));
     242                                                   decode_length(p, p_end, false)));
    245243    }
    246244
     245    size_t terminfosize = decode_length(p, p_end, false);
    247246    map<string, Xapian::MSet::Internal::TermFreqAndWeight> terminfo;
    248     while (p != p_end) {
     247    while (terminfosize-- > 0) {
    249248        Xapian::MSet::Internal::TermFreqAndWeight tfaw;
    250         size_t len = decode_length(&p, p_end, true);
    251         string term(p, len);
    252         p += len;
    253         tfaw.termfreq = decode_length(&p, p_end, false);
    254         tfaw.termweight = unserialise_double(&p, p_end);
     249        size_t len = decode_length(p, p_end, true);
     250        string term(*p, len);
     251        *p += len;
     252        tfaw.termfreq = decode_length(p, p_end, false);
     253        tfaw.termweight = unserialise_double(p, p_end);
    255254        terminfo.insert(make_pair(term, tfaw));
    256255    }
    257256
  • xapian-core/net/remoteserver.cc

     
    2525#include "xapian/database.h"
    2626#include "xapian/enquire.h"
    2727#include "xapian/error.h"
     28#include "xapian/matchspy.h"
    2829#include "xapian/valueiterator.h"
    2930
    3031#include "safeerrno.h"
     
    354355    send_message(REPLY_UPDATE, message);
    355356}
    356357
     358/** Structure holding a list of match spies.
     359 *
     360 *  The main reason for the existence of this structure is to make it easy to
     361 *  ensure that the match spies are all deleted after use.
     362 */
     363struct MatchSpyList {
     364    vector<Xapian::MatchSpy *> spies;
     365
     366    ~MatchSpyList() {
     367        vector<Xapian::MatchSpy *>::const_iterator i;
     368        for (i = spies.begin(); i != spies.end(); ++i) {
     369            delete *i;
     370        }
     371    }
     372};
     373
    357374void
    358375RemoteServer::msg_query(const string &message_in)
    359376{
     
    405422
    406423    // Unserialise the Weight object.
    407424    len = decode_length(&p, p_end, true);
    408     const Xapian::Weight * wttype = ctx.get_weighting_scheme(string(p, len));
     425    string wtname(p, len);
     426    p += len;
     427
     428    const Xapian::Weight * wttype = ctx.get_weighting_scheme(wtname);
    409429    if (wttype == NULL) {
    410430        // Note: user weighting schemes should be registered by adding them to
    411431        // a SerialisationContext, and setting the context using
    412432        // RemoteServer::set_context().
    413433        throw Xapian::InvalidArgumentError("Weighting scheme " +
    414                                            string(p, len) + " not registered");
     434                                           wtname + " not registered");
    415435    }
    416     p += len;
    417436
    418437    len = decode_length(&p, p_end, true);
    419438    AutoPtr<Xapian::Weight> wt(wttype->unserialise(string(p, len)));
    420439    p += len;
    421440
    422441    // Unserialise the RSet object.
    423     Xapian::RSet rset = unserialise_rset(string(p, p_end - p));
     442    len = decode_length(&p, p_end, true);
     443    Xapian::RSet rset = unserialise_rset(string(p, len));
     444    p += len;
    424445
     446    // Unserialise the MatchSpy objects.
     447    vector<Xapian::MatchSpy *>::size_type spycount = decode_length(&p, p_end, false);
     448    MatchSpyList matchspies;
     449    while (spycount != 0) {
     450        len = decode_length(&p, p_end, true);
     451        string spytype(p, len);
     452        const Xapian::MatchSpy * spyclass = ctx.get_match_spy(spytype);
     453        if (spyclass == NULL) {
     454            throw Xapian::InvalidArgumentError("Match spy " + spytype +
     455                                               " not registered");
     456        }
     457        p += len;
     458
     459        len = decode_length(&p, p_end, true);
     460        matchspies.spies.push_back(spyclass->unserialise(string(p, len), ctx));
     461        p += len;
     462
     463        --spycount;
     464    }
     465
    425466    Xapian::Weight::Internal local_stats;
    426467    MultiMatch match(*db, query.get(), qlen, &rset, collapse_max, collapse_key,
    427468                     percent_cutoff, weight_cutoff, order,
    428469                     sort_key, sort_by, sort_value_forward, NULL,
    429                      NULL, local_stats, wt.get());
     470                     NULL, local_stats, wt.get(), matchspies.spies);
    430471
    431472    send_message(REPLY_STATS, serialise_stats(local_stats));
    432473
     
    448489    Xapian::MSet mset;
    449490    match.get_mset(first, maxitems, check_at_least, mset, total_stats, 0, 0);
    450491
    451     send_message(REPLY_RESULTS, serialise_mset(mset));
     492    message = serialise_mset(mset);
     493
     494    for (vector<Xapian::MatchSpy *>::const_iterator i = matchspies.spies.begin();
     495         i != matchspies.spies.end(); ++i) {
     496        string spy_results = (*i)->serialise_results();
     497        message += encode_length(spy_results.size());
     498        message += spy_results;
     499    }
     500    send_message(REPLY_RESULTS, message);
    452501}
    453502
    454503void
  • xapian-core/common/omenquireinternal.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2001,2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
     6 * Copyright 2009 Lemur Consulting Ltd
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    159160         */
    160161        mutable Weight * weight;
    161162
     163        vector<MatchSpy *> spies;
     164
    162165        Internal(const Xapian::Database &databases, ErrorHandler * errorhandler_);
    163166        ~Internal();
    164167
     
    174177        const Query & get_query();
    175178        MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
    176179                      Xapian::doccount check_at_least,
    177                       const RSet *omrset, const MatchDecider *mdecider,
    178                       const MatchDecider *matchspy) const;
     180                      const RSet *omrset,
     181                      const MatchDecider *mdecider,
     182                      const MatchDecider *matchspy_legacy) const;
    179183        ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, int flags,
    180184                      double k, const ExpandDecider *edecider) const;
    181185
  • xapian-core/common/multimatch.h

     
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002,2003,2004,2005,2006,2007,2009 Olly Betts
     5 * Copyright 2009 Lemur Consulting Ltd
    56 *
    67 * This program is free software; you can redistribute it and/or
    78 * modify it under the terms of the GNU General Public License as
     
    7172        /** Is each sub-database remote? */
    7273        vector<bool> is_remote;
    7374
     75        /// The matchspies to use.
     76        const vector<Xapian::MatchSpy *> & matchspies;
     77
    7478        /** get the maxweight that the postlist pl may return, calling
    7579         *  recalc_maxweight if recalculate_w_max is set, and unsetting it.
    7680         *  Must only be called on the top of the postlist tree.
     
    110114                   const Xapian::Sorter * sorter_,
    111115                   Xapian::ErrorHandler * errorhandler,
    112116                   Xapian::Weight::Internal & stats,
    113                    const Xapian::Weight *wtscheme);
     117                   const Xapian::Weight *wtscheme,
     118                   const vector<Xapian::MatchSpy *> & matchspies_);
    114119
    115120        void get_mset(Xapian::doccount first,
    116121                      Xapian::doccount maxitems,
     
    118123                      Xapian::MSet & mset,
    119124                      const Xapian::Weight::Internal & stats,
    120125                      const Xapian::MatchDecider * mdecider,
    121                       const Xapian::MatchDecider * matchspy);
     126                      const Xapian::MatchDecider * matchspy_legacy);
    122127
    123128        /** Called by postlists to indicate that they've rearranged themselves
    124129         *  and the maxweight now possible is smaller.
  • xapian-core/common/remote-database.h

     
    143143     * @param weight_cutoff             Weight cutoff.
    144144     * @param wtscheme                  Weighting scheme.
    145145     * @param omrset                    The rset.
     146     * @param matchspies                The matchspies to use.  NULL if none.
    146147     */
    147148    void set_query(const Xapian::Query::Internal *query,
    148149                   Xapian::termcount qlen,
     
    154155                   bool sort_value_forward,
    155156                   int percent_cutoff, Xapian::weight weight_cutoff,
    156157                   const Xapian::Weight *wtscheme,
    157                    const Xapian::RSet &omrset);
     158                   const Xapian::RSet &omrset,
     159                   const vector<Xapian::MatchSpy *> & matchspies);
    158160
    159161    /** Get the stats from the remote server.
    160162     *
     
    169171                           const Xapian::Weight::Internal &stats);
    170172
    171173    /// Get the MSet from the remote server.
    172     void get_mset(Xapian::MSet &mset);
     174    void get_mset(Xapian::MSet &mset,
     175                  const vector<Xapian::MatchSpy *> & matchspies);
    173176
    174177    /// Get remote termlist.
    175178    TermList * open_term_list(Xapian::docid did) const;
  • xapian-core/common/remoteprotocol.h

     
    4141// 30.6: Support for OP_VALUE_GE and OP_VALUE_LE in query serialisation
    4242// 31: Clean up for Xapian 1.1.0
    4343// 32: Serialise termfreq and reltermfreqs together in serialise_stats.
    44 #define XAPIAN_REMOTE_PROTOCOL_MAJOR_VERSION 32
     44// 33: Support for passing matchspies over the remote connection.
     45#define XAPIAN_REMOTE_PROTOCOL_MAJOR_VERSION 33
    4546#define XAPIAN_REMOTE_PROTOCOL_MINOR_VERSION 0
    4647
    4748/** Message types (client -> server).
  • xapian-core/common/serialisationcontextinternal.h

     
    3131namespace Xapian {
    3232    class Weight;
    3333    class PostingSource;
     34    class MatchSpy;
    3435}
    3536
    3637class Xapian::SerialisationContext::Internal
     
    4142    /// Registered external posting sources.
    4243    std::map<std::string, Xapian::PostingSource *> postingsources;
    4344
     45    /// Registered match spies.
     46    std::map<std::string, Xapian::MatchSpy *> matchspies;
     47
    4448    /// Add the standard default weighting schemes and posting sources.
    4549    void add_defaults();
    4650
     
    5054    /// Clear all registered posting sources from the context.
    5155    void clear_posting_sources();
    5256
     57    /// Clear all registered match spies from the context.
     58    void clear_match_spies();
     59
    5360  public:
    5461    Internal();
    5562    ~Internal();
     
    7784     */
    7885    const Xapian::PostingSource *
    7986            get_posting_source(const std::string & name) const;
     87
     88    /// Register a user-defined match spy class.
     89    void register_match_spy(const Xapian::MatchSpy &spy);
     90
     91    /** Get a match spy given a name.
     92     *
     93     *  The returned match spy is owned by the context object.
     94     *
     95     *  Returns NULL if the match spy could not be found.
     96     */
     97    const Xapian::MatchSpy *
     98            get_match_spy(const std::string & name) const;
     99
    80100};
    81101
    82102#endif // XAPIAN_INCLUDED_SERIALISATIONCONTEXTINTERNAL_H
  • xapian-core/common/serialise.h

     
    132132
    133133/** Unserialise a serialised Xapian::MSet object.
    134134 *
    135  *  @param s    The string to unserialise.
     135 *  @param p     Pointer to pointer to start of the string to unserialise.
     136 *  @param p_end Pointer to end of the string to unserialise.
    136137 *
    137138 *  @return     The unserialised Xapian::MSet object.
    138139 */
    139 Xapian::MSet unserialise_mset(const std::string &s);
     140Xapian::MSet unserialise_mset(const char ** p, const char * p_end);
    140141
    141142/** Serialise a Xapian::RSet object.
    142143 *
  • xapian-core/api/Makefile.mk

     
    1616        api/errorhandler.cc\
    1717        api/expanddecider.cc\
    1818        api/leafpostlist.cc\
     19        api/matchspy.cc\
    1920        api/omdatabase.cc\
    2021        api/omdocument.cc\
    2122        api/omenquire.cc\
  • xapian-core/api/omenquire.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2001,2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
    6  * Copyright 2007 Lemur Consulting Ltd
     6 * Copyright 2007,2009 Lemur Consulting Ltd
    77 *
    88 * This program is free software; you can redistribute it and/or
    99 * modify it under the terms of the GNU General Public License as
     
    642642Enquire::Internal::get_mset(Xapian::doccount first, Xapian::doccount maxitems,
    643643                            Xapian::doccount check_at_least, const RSet *rset,
    644644                            const MatchDecider *mdecider,
    645                             const MatchDecider *matchspy) const
     645                            const MatchDecider *matchspy_legacy) const
    646646{
    647647    DEBUGCALL(API, MSet, "Enquire::Internal::get_mset", first << ", " <<
    648648              maxitems << ", " << check_at_least << ", " << rset << ", " <<
    649               mdecider << ", " << matchspy);
     649              mdecider << ", " << matchspy_legacy);
    650650
    651651    if (percent_cutoff && (sort_by == VAL || sort_by == VAL_REL)) {
    652652        throw Xapian::UnimplementedError("Use of a percentage cutoff while sorting primary by value isn't currently supported");
     
    661661                       collapse_max, collapse_key,
    662662                       percent_cutoff, weight_cutoff,
    663663                       order, sort_key, sort_by, sort_value_forward, sorter,
    664                        errorhandler, stats, weight);
     664                       errorhandler, stats, weight, spies);
    665665    // Run query and put results into supplied Xapian::MSet object.
    666666    MSet retval;
    667667    match.get_mset(first, maxitems, check_at_least, retval,
    668                    stats, mdecider, matchspy);
     668                   stats, mdecider, matchspy_legacy);
    669669
    670670    Assert(weight->name() != "bool" || retval.get_max_possible() == 0);
    671671
     
    893893}
    894894
    895895void
     896Enquire::add_matchspy(MatchSpy * spy) {
     897    DEBUGAPICALL(void, "Xapian::Enquire::add_matchspy", spy);
     898    internal->spies.push_back(spy);
     899}
     900
     901void
     902Enquire::clear_matchspies() {
     903    DEBUGAPICALL(const Xapian::Query &, "Xapian::Enquire::clear_matchspies", "");
     904    internal->spies.clear();
     905}
     906
     907void
    896908Enquire::set_weighting_scheme(const Weight &weight_)
    897909{
    898910    DEBUGAPICALL(void, "Xapian::Enquire::set_weighting_scheme", "[Weight]");
  • xapian-core/api/serialisationcontext.cc

     
    2424#include "xapian/serialisationcontext.h"
    2525
    2626#include "xapian/error.h"
     27#include "xapian/matchspy.h"
    2728#include "xapian/postingsource.h"
    2829#include "xapian/weight.h"
    2930
     
    7879    RETURN(internal->get_weighting_scheme(name));
    7980}
    8081
    81 
    8282void
    8383SerialisationContext::register_posting_source(const Xapian::PostingSource &source)
    8484{
     
    9393    RETURN(internal->get_posting_source(name));
    9494}
    9595
     96void
     97SerialisationContext::register_match_spy(const Xapian::MatchSpy &spy)
     98{
     99    LOGCALL_VOID(API, "Xapian::SerialisationContext::register_match_spy", spy.name());
     100    internal->register_match_spy(spy);
     101}
    96102
     103const Xapian::MatchSpy *
     104SerialisationContext::get_match_spy(const string & name) const
     105{
     106    LOGCALL(API, const Xapian::MatchSpy *, "Xapian::SerialisationContext::get_match_spy", name);
     107    RETURN(internal->get_match_spy(name));
     108}
     109
     110
    97111SerialisationContext::Internal::Internal()
    98112        : Xapian::Internal::RefCntBase(),
    99113          wtschemes(),
     
    106120{
    107121    clear_weighting_schemes();
    108122    clear_posting_sources();
     123    clear_match_spies();
    109124}
    110125
    111126void
     
    128143    postingsources[source->name()] = source;
    129144    source = new Xapian::FixedWeightPostingSource(0.0);
    130145    postingsources[source->name()] = source;
     146
     147    Xapian::MatchSpy * spy;
     148    spy = new Xapian::ValueCountMatchSpy();
     149    matchspies[spy->name()] = spy;
    131150}
    132151
    133152void
     
    149168}
    150169
    151170void
     171SerialisationContext::Internal::clear_match_spies()
     172{
     173    map<string, Xapian::MatchSpy *>::const_iterator i;
     174    for (i = matchspies.begin(); i != matchspies.end(); ++i) {
     175        delete i->second;
     176    }
     177}
     178
     179void
    152180SerialisationContext::Internal::register_weighting_scheme(const Xapian::Weight &wt)
    153181{
    154182    string wtname = wt.name();
     
    220248    return i->second;
    221249}
    222250
     251void
     252SerialisationContext::Internal::register_match_spy(const Xapian::MatchSpy &spy)
     253{
     254    string spyname = spy.name();
     255    if (spyname.empty()) {
     256        throw Xapian::InvalidOperationError("Unable to register match spy - name() method returns empty string.");
     257    }
     258
     259    map<string, Xapian::MatchSpy *>::const_iterator i;
     260    i = matchspies.find(spyname);
     261    if (i != matchspies.end()) {
     262        delete i->second;
     263    }
     264
     265    Xapian::MatchSpy * spyclone = spy.clone();
     266    if (!spyclone) {
     267        matchspies.erase(spyname);
     268        throw Xapian::InvalidOperationError("Unable to register match spy - clone() method returns NULL.");
     269    }
     270    try {
     271        matchspies[spyname] = spyclone;
     272    } catch(...) {
     273        delete spyclone;
     274        matchspies.erase(spyname);
     275        throw;
     276    }
    223277}
     278
     279const Xapian::MatchSpy *
     280SerialisationContext::Internal::get_match_spy(const string & name) const
     281{
     282    map<string, Xapian::MatchSpy *>::const_iterator i;
     283    i = matchspies.find(name);
     284    if (i == matchspies.end()) {
     285        return NULL;
     286    }
     287    return i->second;
     288}
     289
     290}
  • xapian-core/api/matchspy.cc

     
     1/** @file matchspy.cc
     2 * @brief MatchSpy implementation.
     3 */
     4/* Copyright (C) 2007,2008,2009 Olly Betts
     5 * Copyright (C) 2007,2009 Lemur Consulting Ltd
     6 *
     7 * This program is free software; you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation; either version 2 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * This program is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with this program; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     20 */
     21
     22#include <config.h>
     23#include <xapian/matchspy.h>
     24
     25#include <xapian/document.h>
     26#include <xapian/error.h>
     27#include <xapian/queryparser.h>
     28#include <xapian/serialisationcontext.h>
     29
     30#include <map>
     31#include <string>
     32#include <vector>
     33
     34#include "autoptr.h"
     35#include "debuglog.h"
     36#include "omassert.h"
     37#include "serialise.h"
     38#include "stringutils.h"
     39#include "str.h"
     40
     41#include <float.h>
     42#include <math.h>
     43
     44
     45using namespace std;
     46
     47namespace Xapian {
     48
     49MatchSpy::~MatchSpy() {}
     50
     51MatchSpy *
     52MatchSpy::clone() const {
     53    throw UnimplementedError("MatchSpy not suitable for use with remote searches - clone() method unimplemented");
     54}
     55
     56string
     57MatchSpy::name() const {
     58    throw UnimplementedError("MatchSpy not suitable for use with remote searches - name() method unimplemented");
     59}
     60
     61string
     62MatchSpy::serialise() const {
     63    throw UnimplementedError("MatchSpy not suitable for use with remote searches - serialise() method unimplemented");
     64}
     65
     66MatchSpy *
     67MatchSpy::unserialise(const string &, const SerialisationContext &) const {
     68    throw UnimplementedError("MatchSpy not suitable for use with remote searches - unserialise() method unimplemented");
     69}
     70
     71string
     72MatchSpy::serialise_results() const {
     73    throw UnimplementedError("MatchSpy not suitable for use with remote searches - serialise_results() method unimplemented");
     74}
     75
     76void
     77MatchSpy::merge_results(const string &) {
     78    throw UnimplementedError("MatchSpy not suitable for use with remote searches - merge_results() method unimplemented");
     79}
     80
     81string
     82MatchSpy::get_description() const {
     83    return "Xapian::MatchSpy()";
     84}
     85
     86
     87/** Compare two StringAndFrequency objects.
     88 *
     89 *  The comparison is firstly by frequency (higher is better), then by string
     90 *  (earlier lexicographic sort is better).
     91 */
     92class StringAndFreqCmpByFreq {
     93  public:
     94    /// Default constructor
     95    StringAndFreqCmpByFreq() {}
     96
     97    /// Return true if a has a higher frequency than b.
     98    /// If equal, compare by the str, to provide a stable sort order.
     99    bool operator()(const StringAndFrequency &a,
     100                    const StringAndFrequency &b) const {
     101        if (a.get_frequency() > b.get_frequency()) return true;
     102        if (a.get_frequency() < b.get_frequency()) return false;
     103        if (a.get_string() > b.get_string()) return false;
     104        return true;
     105    }
     106};
     107
     108/** Get the most frequent items from a map from string to frequency.
     109 *
     110 *  This takes input such as that returned by @a
     111 *  ValueCountMatchSpy::get_values(), and returns a vector of the most
     112 *  frequent items in the input.
     113 *
     114 *  @param result A vector which will be filled with the most frequent
     115 *                items, in descending order of frequency.  Items with
     116 *                the same frequency will be sorted in ascending
     117 *                alphabetical order.
     118 *
     119 *  @param items The map from string to frequency, from which the most
     120 *               frequent items will be selected.
     121 *
     122 *  @param maxitems The maximum number of items to return.
     123 */
     124static void
     125get_most_frequent_items(vector<StringAndFrequency> & result,
     126                        const map<string, doccount> & items,
     127                        size_t maxitems)
     128{
     129    result.clear();
     130    result.reserve(maxitems);
     131    StringAndFreqCmpByFreq cmpfn;
     132    bool is_heap(false);
     133
     134    for (map<string, doccount>::const_iterator i = items.begin();
     135         i != items.end(); i++) {
     136        Assert(result.size() <= maxitems);
     137        result.push_back(StringAndFrequency(i->first, i->second));
     138        if (result.size() > maxitems) {
     139            // Make the list back into a heap.
     140            if (is_heap) {
     141                // Only the new element isn't in the right place.
     142                push_heap(result.begin(), result.end(), cmpfn);
     143            } else {
     144                // Need to build heap from scratch.
     145                make_heap(result.begin(), result.end(), cmpfn);
     146                is_heap = true;
     147            }
     148            pop_heap(result.begin(), result.end(), cmpfn);
     149            result.pop_back();
     150        }
     151    }
     152
     153    if (is_heap) {
     154        sort_heap(result.begin(), result.end(), cmpfn);
     155    } else {
     156        sort(result.begin(), result.end(), cmpfn);
     157    }
     158}
     159
     160void
     161ValueCountMatchSpy::operator()(const Document &doc, weight) {
     162    ++total;
     163    string val(doc.get_value(slot));
     164    if (!val.empty()) ++values[val];
     165}
     166
     167void
     168ValueCountMatchSpy::get_top_values(vector<StringAndFrequency> & result,
     169                                   size_t maxvalues) const
     170{
     171    get_most_frequent_items(result, values, maxvalues);
     172}
     173
     174MatchSpy *
     175ValueCountMatchSpy::clone() const {
     176    return new ValueCountMatchSpy(slot);
     177}
     178
     179string
     180ValueCountMatchSpy::name() const {
     181    return "Xapian::ValueCountMatchSpy";
     182}
     183
     184string
     185ValueCountMatchSpy::serialise() const {
     186    string result;
     187    result += encode_length(slot);
     188    return result;
     189}
     190
     191MatchSpy *
     192ValueCountMatchSpy::unserialise(const string & s,
     193                                const SerialisationContext &) const{
     194    const char * p = s.data();
     195    const char * end = p + s.size();
     196
     197    valueno new_slot = decode_length(&p, end, false);
     198    if (p != end) {
     199        throw NetworkError("Junk at end of serialised ValueCountMatchSpy");
     200    }
     201
     202    return new ValueCountMatchSpy(new_slot);
     203}
     204
     205string
     206ValueCountMatchSpy::serialise_results() const {
     207    LOGCALL(REMOTE, string, "ValueCountMatchSpy::serialise_results", "");
     208    string result;
     209    result += encode_length(total);
     210    result += encode_length(values.size());
     211    for (map<string, doccount>::const_iterator i = values.begin();
     212         i != values.end(); ++i) {
     213        result += encode_length(i->first.size());
     214        result += i->first;
     215        result += encode_length(i->second);
     216    }
     217    RETURN(result);
     218}
     219
     220void
     221ValueCountMatchSpy::merge_results(const string & s) {
     222    LOGCALL_VOID(REMOTE, "ValueCountMatchSpy::merge_results", s);
     223    const char * p = s.data();
     224    const char * end = p + s.size();
     225
     226    total += decode_length(&p, end, false);
     227
     228    map<string, doccount>::size_type items = decode_length(&p, end, false);
     229    while (p != end) {
     230        while(items != 0) {
     231            size_t vallen = decode_length(&p, end, true);
     232            string val(p, vallen);
     233            p += vallen;
     234            doccount freq = decode_length(&p, end, false);
     235            values[val] += freq;
     236            --items;
     237        }
     238    }
     239}
     240
     241string
     242ValueCountMatchSpy::get_description() const {
     243    return "Xapian::ValueCountMatchSpy(" + str(total) +
     244            " docs seen, looking in " + str(values.size()) + " slots)";
     245}
     246
     247
     248inline double sqrd(double x) { return x * x; }
     249
     250/** Calculate a score based on how evenly distributed the frequencies of a set
     251 *  of values are.
     252 */
     253template<class T> double
     254do_score_evenness(const map<T, doccount> & values,
     255                  doccount total,
     256                  double desired_no_of_categories)
     257{
     258    if (total == 0) return 0.0;
     259
     260    size_t total_unset = total;
     261    double score = 0.0;
     262
     263    if (desired_no_of_categories <= 0.0)
     264        desired_no_of_categories = values.size();
     265
     266    double avg = double(total) / desired_no_of_categories;
     267
     268    typename map<T, doccount>::const_iterator i;
     269    for (i = values.begin(); i != values.end(); ++i) {
     270        size_t count = i->second;
     271        total_unset -= count;
     272        score += sqrd(count - avg);
     273    }
     274    if (total_unset) score += sqrd(total_unset - avg);
     275
     276    // Scale down so the total number of items doesn't make a difference.
     277    score /= sqrd(total);
     278
     279    // Bias towards returning the number of categories requested.
     280    score += 0.01 * sqrd(desired_no_of_categories - values.size());
     281
     282    return score;
     283}
     284
     285double score_evenness(const map<string, doccount> & values,
     286                      doccount total,
     287                      double desired_no_of_categories) {
     288    return do_score_evenness(values, total, desired_no_of_categories);
     289}
     290
     291double score_evenness(const map<NumericRange, doccount> & values,
     292                      doccount total,
     293                      double desired_no_of_categories) {
     294    return do_score_evenness(values, total, desired_no_of_categories);
     295}
     296
     297double score_evenness(const ValueCountMatchSpy & spy,
     298                      double desired_no_of_categories) {
     299    return do_score_evenness(spy.get_values(), spy.get_total(),
     300                             desired_no_of_categories);
     301}
     302
     303
     304/** A bucket, used when building numeric ranges.
     305 */
     306struct bucketval {
     307    size_t count;
     308    double min, max;
     309
     310    bucketval() : count(0), min(DBL_MAX), max(-DBL_MAX) { }
     311
     312    void update(size_t n, double value) {
     313        count += n;
     314        if (value < min) min = value;
     315        if (value > max) max = value;
     316    }
     317};
     318
     319doccount build_numeric_ranges(map<NumericRange, doccount> & result,
     320                              const map<string, doccount> & values,
     321                              size_t max_ranges)
     322{
     323    double lo = DBL_MAX, hi = -DBL_MAX;
     324    result.clear();
     325
     326    map<double, doccount> histo;
     327    doccount total_set = 0;
     328    map<string, doccount>::const_iterator i;
     329    for (i = values.begin(); i != values.end(); ++i) {
     330        if (i->first.size() == 0) continue;
     331        double v = sortable_unserialise(i->first.c_str());
     332        if (v < lo) lo = v;
     333        if (v > hi) hi = v;
     334        doccount count = i->second;
     335        histo[v] = count;
     336        total_set += count;
     337    }
     338
     339    if (total_set == 0) {
     340        // No set values.
     341        return total_set;
     342    }
     343    if (lo == hi) {
     344        // All set values are the same.
     345        NumericRange range(lo, hi);
     346        result[range] = total_set;
     347        return total_set;
     348    }
     349
     350    double sizeby = max(fabs(hi), fabs(lo));
     351    // E.g. if sizeby = 27.4 and max_ranges = 7, we want to split into units of
     352    // width 1.0 which we may then coalesce if there are too many used buckets.
     353    double unit = pow(10.0, floor(log10(sizeby / max_ranges) - 0.2));
     354    double start = floor(lo / unit) * unit;
     355    // Can happen due to FP rounding (e.g. lo = 11.95, unit = 0.01).
     356    if (start > lo) start = lo;
     357    size_t n_buckets = size_t(ceil(hi / unit) - floor(lo / unit));
     358
     359    bool scaleby2 = true;
     360    vector<bucketval> bucket(n_buckets + 1);
     361    while (true) {
     362        size_t n_used = 0;
     363        map<double, doccount>::const_iterator j;
     364        for (j = histo.begin(); j != histo.end(); ++j) {
     365            double v = j->first;
     366            size_t b = size_t(floor((v - start) / unit));
     367            if (b > n_buckets) b = n_buckets; // FIXME - Hacky workaround to ensure that b is in range.
     368            if (bucket[b].count == 0) ++n_used;
     369            bucket[b].update(j->second, v);
     370        }
     371
     372        if (n_used <= max_ranges) break;
     373
     374        unit *= scaleby2 ? 2.0 : 2.5;
     375        scaleby2 = !scaleby2;
     376        start = floor(lo / unit) * unit;
     377        // Can happen due to FP rounding (e.g. lo = 11.95, unit = 0.01).
     378        if (start > lo) start = lo;
     379        n_buckets = size_t(ceil(hi / unit) - floor(lo / unit));
     380        bucket.resize(0);
     381        bucket.resize(n_buckets + 1);
     382    }
     383
     384    map<string, doccount> discrete_categories;
     385    for (size_t b = 0; b < bucket.size(); ++b) {
     386        if (bucket[b].count == 0) continue;
     387        NumericRange range(bucket[b].min, bucket[b].max);
     388        result[range] = bucket[b].count;
     389    }
     390
     391    return total_set;
     392}
     393
     394}
  • xapian-core/backends/remote/remote-database.cc

    Property changes on: xapian-core/api/matchspy.cc
    ___________________________________________________________________
    Added: svn:eol-style
       + native
    
     
    4343#include <vector>
    4444
    4545#include "xapian/error.h"
     46#include "xapian/matchspy.h"
    4647
    4748using namespace std;
    4849
     
    536537                         bool sort_value_forward,
    537538                         int percent_cutoff, Xapian::weight weight_cutoff,
    538539                         const Xapian::Weight *wtscheme,
    539                          const Xapian::RSet &omrset)
     540                         const Xapian::RSet &omrset,
     541                         const vector<Xapian::MatchSpy *> & matchspies)
    540542{
    541543    string tmp = query->serialise();
    542544    string message = encode_length(tmp.size());
     
    561563    message += encode_length(tmp.size());
    562564    message += tmp;
    563565
    564     message += serialise_rset(omrset);
     566    tmp = serialise_rset(omrset);
     567    message += encode_length(tmp.size());
     568    message += tmp;
    565569
     570    message += encode_length(matchspies.size());
     571    for (vector<Xapian::MatchSpy *>::const_iterator i = matchspies.begin();
     572         i != matchspies.end(); ++i) {
     573
     574        tmp = (*i)->name();
     575        if (tmp.size() == 0) {
     576            throw Xapian::UnimplementedError("MatchSpy not suitable for use with remote searches - name() method returned empty string");
     577        }
     578        message += encode_length(tmp.size());
     579        message += tmp;
     580
     581        tmp = (*i)->serialise();
     582        message += encode_length(tmp.size());
     583        message += tmp;
     584    }
     585
    566586    send_message(MSG_QUERY, message);
    567587}
    568588
     
    592612}
    593613
    594614void
    595 RemoteDatabase::get_mset(Xapian::MSet &mset)
     615RemoteDatabase::get_mset(Xapian::MSet &mset,
     616                         const vector<Xapian::MatchSpy *> & matchspies)
    596617{
    597618    string message;
    598619    get_message(message, REPLY_RESULTS);
    599     mset = unserialise_mset(message);
     620    const char * p = message.data();
     621    const char * p_end = p + message.size();
     622    mset = unserialise_mset(&p, p_end);
     623
     624    for (vector<Xapian::MatchSpy *>::const_iterator i = matchspies.begin();
     625         i != matchspies.end(); ++i) {
     626        if (p == p_end)
     627            throw Xapian::NetworkError("Expected serialised matchspy");
     628        size_t len = decode_length(&p, p_end, true);
     629        string spyresults = string(p, len);
     630        p += len;
     631        (*i)->merge_results(spyresults);
     632    }
     633    if (p != p_end)
     634        throw Xapian::NetworkError("Junk at end of mset");
    600635}
    601636
    602637void
  • xapian-bindings/csharp/Makefile.am

     
    2626        Flint.cs \
    2727        InMemory.cs \
    2828        MatchDecider.cs \
     29        MatchSpy.cs \
    2930        MSet.cs \
    3031        MSetIterator.cs \
    3132        MultiValueSorter.cs \
     
    3738        QueryParser.cs \
    3839        Remote.cs \
    3940        RSet.cs \
     41        SWIGTYPE_p_std__mapT_Xapian__NumericRange_unsigned_int_t.cs \
     42        SWIGTYPE_p_std__mapT_std__string_unsigned_int_t.cs \
    4043        SWIGTYPE_p_std__string.cs \
    4144        SWIGTYPE_p_std__vectorT_std__string_t.cs \
    4245        SWIGTYPE_p_std__vectorT_Xapian__Query_t.cs \
     46        SWIGTYPE_p_std__vectorT_Xapian__StringAndFrequency_t.cs \
    4347        SerialisationContext.cs \
    4448        SimpleStopper.cs \
    4549        Sorter.cs \
     
    4953        TermGenerator.cs \
    5054        TermIterator.cs \
    5155        TradWeight.cs \
     56        ValueCountMatchSpy.cs \
    5257        ValueIterator.cs \
    5358        ValueRangeProcessor.cs \
    5459        Version.cs \
  • xapian-bindings/python/pythontest2.py

     
    12891289    enq.set_query(xapian.Query('foo'))
    12901290    enq.get_mset(0, 10)
    12911291
     1292def test_matchspy():
     1293    """Test use of matchspies.
     1294
     1295    """
     1296    db = setup_database()
     1297    query = xapian.Query(xapian.Query.OP_OR, "was", "it")
     1298    enq = xapian.Enquire(db)
     1299    enq.set_query(query)
     1300
     1301    def set_matchspy_deref(enq):
     1302        """Set a matchspy, and then drop the reference, to check that it
     1303        doesn't get deleted too soon.
     1304        """
     1305        spy = xapian.ValueCountMatchSpy(0)
     1306        enq.add_matchspy(spy)
     1307        del spy
     1308    set_matchspy_deref(enq)
     1309    mset = enq.get_mset(0, 10)
     1310    expect(len(mset), 5)
     1311
     1312    spy = xapian.ValueCountMatchSpy(0)
     1313    enq.add_matchspy(spy)
     1314    mset = enq.get_mset(0, 10)
     1315    expect(spy.get_values_as_dict(), {'zero': 1})
     1316    expect(spy.get_total(), 5)
     1317    expect(spy.get_top_values(10), [('zero', 1)])
     1318
    12921319# Run all tests (ie, callables with names starting "test_").
    12931320if not runtests(globals(), sys.argv[1:]):
    12941321    sys.exit(1)
  • xapian-bindings/python/util.i

     
    172172    }
    173173}
    174174
     175%{
     176/* Typemap for returning a map of ints keyed by strings: converts to a dict.
     177 * This is used for @a ValueCountMatchSpy::get_values().
     178 * The GIL must be held when this is called.
     179 */
     180PyObject *
     181value_map_to_dict(const std::map<std::string, Xapian::doccount> & vals)
     182{
     183    PyObject * result = PyDict_New();
     184    if (result == 0) {
     185        return NULL;
     186    }
     187
     188    std::map<std::string, Xapian::doccount>::const_iterator i;
     189    for (i = vals.begin(); i != vals.end(); ++i) {
     190        PyObject * str = PyString_FromStringAndSize((*i).first.data(),
     191                                                    (*i).first.size());
     192        if (str == 0) {
     193            Py_DECREF(result);
     194            result = NULL;
     195            return NULL;
     196        }
     197
     198        PyObject * l = PyInt_FromLong((*i).second);
     199        if (l == 0) {
     200            Py_DECREF(str);
     201            Py_DECREF(result);
     202            result = NULL;
     203            return NULL;
     204        }
     205
     206        if (PyDict_SetItem(result, str, l) == -1) {
     207            Py_DECREF(result);
     208            result = NULL;
     209            return NULL;
     210        }
     211        Py_DECREF(str);
     212        Py_DECREF(l);
     213    }
     214    return result;
     215}
     216%}
     217
     218/** Typemap pair for getting the return value from @a ValueCountMatchSpy::get_top_values().
     219 */
     220%typemap(in, numinputs=0) std::vector<Xapian::StringAndFrequency> & result (std::vector<Xapian::StringAndFrequency> temp) {
     221    $1 = &temp;
     222}
     223%typemap(argout) std::vector<Xapian::StringAndFrequency> & result {
     224    Py_DECREF($result);
     225    $result = PyList_New($1->size());
     226    size_t pos = 0;
     227    for (std::vector<Xapian::StringAndFrequency>::const_iterator i = $1->begin();
     228         i != $1->end(); ++i) {
     229        PyObject * str = PyString_FromStringAndSize((*i).get_string().data(),
     230                                                    (*i).get_string().size());
     231        if (str == 0) {
     232            Py_DECREF($result);
     233            $result = NULL;
     234            SWIG_fail;
     235        }
     236
     237        PyObject * l = PyInt_FromLong((*i).get_frequency());
     238        if (l == 0) {
     239            Py_DECREF($result);
     240            Py_DECREF(str);
     241            $result = NULL;
     242            SWIG_fail;
     243        }
     244
     245        PyObject *t = PyTuple_New(2);
     246        if (t == 0) {
     247            Py_DECREF($result);
     248            Py_DECREF(str);
     249            Py_DECREF(l);
     250            $result = NULL;
     251            SWIG_fail;
     252        }
     253        PyTuple_SetItem(t, 0, str);
     254        PyTuple_SetItem(t, 1, l);
     255
     256        PyList_SetItem($result, pos++, t);
     257    }
     258}
     259
    175260%typedef PyObject *LangSpecificListType;
    176261
    177262%inline %{
  • xapian-bindings/python/extra.i

     
    2222 */
    2323%}
    2424
     25%extend ValueCountMatchSpy {
     26    %feature("nothread") get_values_as_dict;
     27    %exception get_values_as_dict {
     28        try {
     29            $action
     30        } catch (...) {
     31            Xapian::SetPythonException();
     32            SWIG_fail;
     33        }
     34    }
     35    PyObject * get_values_as_dict() {
     36        return value_map_to_dict($self->get_values());
     37    }
     38}
     39
    2540%pythoncode %{
    2641
    2742# Set the documentation format - this is used by tools like "epydoc" to decide
     
    10761091__all__ = tuple(__all__)
    10771092
    10781093
     1094# Fix up Enquire so that it keeps a python reference to the deciders supplied
     1095# to it so that they won't be deleted before the Enquire object.  This hack can
     1096# probably be removed once xapian bug #186 is fixed.
     1097_enquire_add_matchspy_orig = Enquire.add_matchspy
     1098def _enquire_match_spy_add(self, decider):
     1099    if not hasattr(self, '_deciders'):
     1100        self._deciders = []
     1101    self._deciders.append(decider)
     1102    _enquire_add_matchspy_orig(self, decider)
     1103_enquire_match_spy_add.__doc__ = Enquire.add_matchspy.__doc__
     1104Enquire.add_matchspy = _enquire_match_spy_add
     1105
     1106_enquire_clear_matchspies_orig = Enquire.clear_matchspies
     1107def _enquire_match_spies_clear(self):
     1108    _enquire_clear_matchspies_orig(self, decider)
     1109    if hasattr(self, '_deciders'):
     1110        del self._deciders
     1111_enquire_match_spies_clear.__doc__ = Enquire.clear_matchspies.__doc__
     1112Enquire.clear_matchspies = _enquire_match_spies_clear
     1113
     1114
     1115
    10791116# Remove static methods which shouldn't be in the API.
    10801117del Document_unserialise
    10811118del Query_unserialise
  • xapian-bindings/xapian.i

     
    356356#endif
    357357
    358358class Database;
     359class MatchSpy;
    359360class Query;
    360361class Sorter;
    361362
     
    367368    void set_query(const Query & query, termcount qlen = 0);
    368369    const Query& get_query();
    369370
     371    void add_matchspy(MatchSpy * spy);
     372    void clear_matchspies();
     373
    370374    void set_weighting_scheme(const Weight& weight);
    371375    void set_collapse_key(Xapian::valueno collapse_key,
    372376                          Xapian::doccount collapse_max = 1);
     
    440444
    441445}
    442446
     447%ignore Xapian::SerialisationContext::operator=;
     448%include <xapian/serialisationcontext.h>
     449
    443450/* Generated code won't compile if directors are enabled.  Disable for now
    444451 * while we investigate.
    445452 *
     
    466473%warnfilter(842) Xapian::TradWeight::unserialise;
    467474%include <xapian/weight.h>
    468475
     476%ignore Xapian::NumericRange::operator<;
     477%include <xapian/matchspy.h>
     478
    469479namespace Xapian {
    470480
    471481// xapian/database.h
     
    747757%include <xapian/replication.h>
    748758%include <xapian/valuesetmatchdecider.h>
    749759
    750 %ignore Xapian::SerialisationContext::operator=;
    751 %include <xapian/serialisationcontext.h>
    752 
    753760namespace Xapian {
    754761
    755762#if defined SWIGPYTHON