Ticket #394: phrase-settling-pond-update-20120913.patch

File phrase-settling-pond-update-20120913.patch, 46.3 KB (added by Olly Betts, 12 years ago)

internally now works with PostList* objects rather than turning then back into strings

  • xapian-core/api/postlist.cc

    diff --git a/xapian-core/api/postlist.cc b/xapian-core/api/postlist.cc
    index 2684913..4fb825c 100644
    a b  
    11/** @file postlist.cc
    22 * @brief Abstract base class for postlists.
    33 */
    4 /* Copyright (C) 2007,2009,2011 Olly Betts
     4/* Copyright (C) 2007,2009,2011,2012 Olly Betts
    55 *
    66 * This program is free software; you can redistribute it and/or
    77 * modify it under the terms of the GNU General Public License as
    PostingIterator::Internal::get_collapse_key() const  
    5353}
    5454
    5555PositionList *
    56 PostList::read_position_list()
     56PostList::read_position_list(Xapian::docid)
    5757{
    5858    throw Xapian::UnimplementedError("OP_NEAR and OP_PHRASE only currently support terms as subqueries");
    5959}
  • xapian-core/api/postlist.h

    diff --git a/xapian-core/api/postlist.h b/xapian-core/api/postlist.h
    index 0c7ca1f..85d4ff8 100644
    a b  
    11/** @file postlist.h
    22 * @brief Abstract base class for postlists.
    33 */
    4 /* Copyright (C) 2007,2008,2009,2011 Olly Betts
     4/* Copyright (C) 2007,2008,2009,2011,2012 Olly Betts
    55 * Copyright (C) 2009 Lemur Consulting Ltd
    66 *
    77 * This program is free software; you can redistribute it and/or
    class Xapian::PostingIterator::Internal : public Xapian::Internal::intrusive_bas  
    113113     */
    114114    virtual double recalc_maxweight() = 0;
    115115
    116     /** Read the position list for the term in the current document and
    117      *  return a pointer to it (owned by the PostList).
     116    /** Read the position list for the term and return a pointer to it (owned
     117     *  by the PostList).
     118     *
     119     *  The position list is for the specified document id, or the current
     120     *  document if no document id is specified.
    118121     *
    119122     *  The default implementation throws Xapian::UnimplementedError.
    120123     */
    121     virtual PositionList * read_position_list();
     124    virtual PositionList * read_position_list(Xapian::docid other_did = 0);
    122125
    123126    /** Read the position list for the term in the current document and
    124127     *  return a pointer to it (not owned by the PostList).
  • xapian-core/api/queryinternal.cc

    diff --git a/xapian-core/api/queryinternal.cc b/xapian-core/api/queryinternal.cc
    index 9359e48..6b7c70c 100644
    a b class AndContext : public Context {  
    248248                  Xapian::termcount window_)
    249249            : op_(op__), begin(begin_), end(end_), window(window_) { }
    250250
    251         PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
     251        PostList * postlist(PostList * pl, const vector<PostList*>& pls,
     252                            QueryOptimiser * qopt) const;
    252253    };
    253254
    254255    list<PosFilter> pos_filters;
    class AndContext : public Context {  
    264265};
    265266
    266267PostList *
    267 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
     268AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls,
     269                                QueryOptimiser * qopt) const
    268270try {
    269271    vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
    270272    vector<PostList *>::const_iterator terms_end = pls.begin() + end;
    try {  
    273275        pl = new NearPostList(pl, window, terms_begin, terms_end);
    274276    } else if (window == end - begin) {
    275277        AssertEq(op_, Xapian::Query::OP_PHRASE);
    276         pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
     278        if (qopt->top_and) {
     279            vector<PostList *>::const_iterator j;
     280            for (j = terms_begin; j != terms_end; ++j) {
     281                qopt->pool_terms.push_back(*j);
     282            }
     283            // We can currently only handle hoisting out one phrase check.
     284            // FIXME: Gather a list of checks, not a list of the subqueries in
     285            // one check.
     286            qopt->top_and = false;
     287        } else {
     288            pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
     289        }
    277290    } else {
    278291        AssertEq(op_, Xapian::Query::OP_PHRASE);
    279292        pl = new PhrasePostList(pl, window, terms_begin, terms_end);
    AndContext::postlist(QueryOptimiser* qopt)  
    308321    list<PosFilter>::const_iterator i;
    309322    for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
    310323        const PosFilter & filter = *i;
    311         pl.reset(filter.postlist(pl.release(), pls));
     324        pl.reset(filter.postlist(pl.release(), pls, qopt));
    312325    }
    313326
    314327    // Empty pls so our destructor doesn't delete them all!
    Query::Internal::postlist_sub_or_like(OrContext& ctx,  
    491504                                      QueryOptimiser * qopt,
    492505                                      double factor) const
    493506{
     507    bool top_and = qopt->top_and;
     508    qopt->top_and = false;
    494509    ctx.add_postlist(postlist(qopt, factor));
     510    qopt->top_and = top_and;
    495511}
    496512
    497513void
    Query::Internal::postlist_sub_xor(XorContext& ctx,  
    499515                                  QueryOptimiser * qopt,
    500516                                  double factor) const
    501517{
     518    bool top_and = qopt->top_and;
     519    qopt->top_and = false;
    502520    ctx.add_postlist(postlist(qopt, factor));
     521    qopt->top_and = top_and;
    503522}
    504523
    505524namespace Internal {
    QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const  
    11471166    LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
    11481167    // FIXME: Combine and-like side with and-like stuff above.
    11491168    AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
     1169    bool top_and = qopt->top_and;
     1170    qopt->top_and = false;
    11501171    OrContext ctx(subqueries.size() - 1);
    11511172    do_or_like(ctx, qopt, 0.0, 0, 1);
    11521173    AutoPtr<PostList> r(ctx.postlist(qopt));
     1174    qopt->top_and = top_and;
    11531175    RETURN(new AndNotPostList(l.release(), r.release(),
    11541176                              qopt->matcher, qopt->db_size));
    11551177}
    QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const  
    11801202    LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
    11811203    // FIXME: Combine and-like side with and-like stuff above.
    11821204    AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
     1205    bool top_and = qopt->top_and;
     1206    qopt->top_and = false;
    11831207    OrContext ctx(subqueries.size() - 1);
    11841208    do_or_like(ctx, qopt, factor, 0, 1);
    11851209    AutoPtr<PostList> r(ctx.postlist(qopt));
     1210    qopt->top_and = top_and;
    11861211    RETURN(new AndMaybePostList(l.release(), r.release(),
    11871212                                qopt->matcher, qopt->db_size));
    11881213}
  • xapian-core/backends/brass/brass_alldocspostlist.cc

    diff --git a/xapian-core/backends/brass/brass_alldocspostlist.cc b/xapian-core/backends/brass/brass_alldocspostlist.cc
    index 888f77c..12fc093 100644
    a b BrassAllDocsPostList::get_wdf() const  
    6464}
    6565
    6666PositionList *
    67 BrassAllDocsPostList::read_position_list()
     67BrassAllDocsPostList::read_position_list(Xapian::docid)
    6868{
    69     LOGCALL(DB, PositionList *, "BrassAllDocsPostList::read_position_list", NO_ARGS);
     69    LOGCALL(DB, PositionList *, "BrassAllDocsPostList::read_position_list", "[docid]");
    7070    throw Xapian::InvalidOperationError("BrassAllDocsPostList::read_position_list() not meaningful");
    7171}
    7272
  • xapian-core/backends/brass/brass_alldocspostlist.h

    diff --git a/xapian-core/backends/brass/brass_alldocspostlist.h b/xapian-core/backends/brass/brass_alldocspostlist.h
    index b077e46..3d80951 100644
    a b class BrassAllDocsPostList : public BrassPostList {  
    4646
    4747    Xapian::termcount get_wdf() const;
    4848
    49     PositionList *read_position_list();
     49    PositionList *read_position_list(Xapian::docid other_did = 0);
    5050
    5151    PositionList *open_position_list() const;
    5252
  • xapian-core/backends/brass/brass_postlist.cc

    diff --git a/xapian-core/backends/brass/brass_postlist.cc b/xapian-core/backends/brass/brass_postlist.cc
    index 96d0f17..ac2a16b 100644
    a b BrassPostList::next_chunk()  
    785785}
    786786
    787787PositionList *
    788 BrassPostList::read_position_list()
     788BrassPostList::read_position_list(Xapian::docid other_did)
    789789{
    790     LOGCALL(DB, PositionList *, "BrassPostList::read_position_list", NO_ARGS);
     790    LOGCALL(DB, PositionList *, "BrassPostList::read_position_list", other_did);
     791    if (other_did == 0)
     792        other_did = did;
    791793    Assert(this_db.get());
    792     positionlist.read_data(&this_db->position_table, did, term);
     794    positionlist.read_data(&this_db->position_table, other_did, term);
    793795    RETURN(&positionlist);
    794796}
    795797
  • xapian-core/backends/brass/brass_postlist.h

    diff --git a/xapian-core/backends/brass/brass_postlist.h b/xapian-core/backends/brass/brass_postlist.h
    index efd5268..a38b8f4 100644
    a b class BrassPostList : public LeafPostList {  
    252252         */
    253253        Xapian::termcount get_wdf() const { Assert(have_started); return wdf; }
    254254
    255         /** Get the list of positions of the term in the current document.
     255        /** Get the list of positions of the term.
    256256         */
    257         PositionList *read_position_list();
     257        PositionList * read_position_list(Xapian::docid other_did = 0);
    258258
    259259        /** Get the list of positions of the term in the current document.
    260260         */
  • xapian-core/backends/chert/chert_alldocspostlist.cc

    diff --git a/xapian-core/backends/chert/chert_alldocspostlist.cc b/xapian-core/backends/chert/chert_alldocspostlist.cc
    index ad2dc99..8af180c 100644
    a b ChertAllDocsPostList::get_wdf() const  
    6464}
    6565
    6666PositionList *
    67 ChertAllDocsPostList::read_position_list()
     67ChertAllDocsPostList::read_position_list(Xapian::docid)
    6868{
    69     LOGCALL(DB, PositionList *, "ChertAllDocsPostList::read_position_list", NO_ARGS);
     69    LOGCALL(DB, PositionList *, "ChertAllDocsPostList::read_position_list", "[docid]");
    7070    throw Xapian::InvalidOperationError("ChertAllDocsPostList::read_position_list() not meaningful");
    7171}
    7272
  • xapian-core/backends/chert/chert_alldocspostlist.h

    diff --git a/xapian-core/backends/chert/chert_alldocspostlist.h b/xapian-core/backends/chert/chert_alldocspostlist.h
    index bb2aaaa..18bf56a 100644
    a b class ChertAllDocsPostList : public ChertPostList {  
    4646
    4747    Xapian::termcount get_wdf() const;
    4848
    49     PositionList *read_position_list();
     49    PositionList *read_position_list(Xapian::docid other_did = 0);
    5050
    5151    PositionList *open_position_list() const;
    5252
  • xapian-core/backends/chert/chert_modifiedpostlist.cc

    diff --git a/xapian-core/backends/chert/chert_modifiedpostlist.cc b/xapian-core/backends/chert/chert_modifiedpostlist.cc
    index 387fb6e..b3d0122 100644
    a b  
    2424#include "chert_database.h"
    2525#include "debuglog.h"
    2626
    27 ChertModifiedPostList::~ChertModifiedPostList()
    28 {
    29     delete poslist;
    30 }
    31 
    3227void
    3328ChertModifiedPostList::skip_deletes(double w_min)
    3429{
    ChertModifiedPostList::get_wdf() const  
    8277}
    8378
    8479PositionList *
    85 ChertModifiedPostList::read_position_list()
     80ChertModifiedPostList::read_position_list(Xapian::docid other_did)
    8681{
     82    if (other_did != 0) {
     83        return ChertPostList::read_position_list(other_did);
     84    }
    8785    if (it != mods.end() && (ChertPostList::at_end() || it->first <= ChertPostList::get_docid())) {
    88         if (poslist) {
    89             delete poslist;
    90             poslist = NULL;
    91         }
    92         poslist = this_db->open_position_list(it->first, term);
    93         return poslist;
     86        return ChertPostList::read_position_list(it->first);
    9487    }
    9588    return ChertPostList::read_position_list();
    9689}
  • xapian-core/backends/chert/chert_modifiedpostlist.h

    diff --git a/xapian-core/backends/chert/chert_modifiedpostlist.h b/xapian-core/backends/chert/chert_modifiedpostlist.h
    index 68b1609..0d0fde5 100644
    a b class ChertModifiedPostList : public ChertPostList {  
    3636    map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator it;
    3737    //@}
    3838
    39     /// Pointer to PositionList returned from read_position_list to be deleted.
    40     PositionList * poslist;
    41 
    4239    /// Skip over deleted documents after a next() or skip_to().
    4340    void skip_deletes(double w_min);
    4441
    class ChertModifiedPostList : public ChertPostList {  
    4845                          const string & term_,
    4946                          const map<Xapian::docid, pair<char, Xapian::termcount> > & mods_)
    5047        : ChertPostList(this_db_, term_, true),
    51           mods(mods_), it(mods.begin()), poslist(0)
     48          mods(mods_), it(mods.begin())
    5249    { }
    5350
    54     ~ChertModifiedPostList();
    55 
    5651    Xapian::doccount get_termfreq() const;
    5752
    5853    Xapian::docid get_docid() const;
    class ChertModifiedPostList : public ChertPostList {  
    6156
    6257    Xapian::termcount get_wdf() const;
    6358
    64     PositionList *read_position_list();
     59    PositionList *read_position_list(Xapian::docid other_did = 0);
    6560
    6661    PositionList *open_position_list() const;
    6762
  • xapian-core/backends/chert/chert_postlist.cc

    diff --git a/xapian-core/backends/chert/chert_postlist.cc b/xapian-core/backends/chert/chert_postlist.cc
    index 6816821..59af639 100644
    a b ChertPostList::next_chunk()  
    785785}
    786786
    787787PositionList *
    788 ChertPostList::read_position_list()
     788ChertPostList::read_position_list(Xapian::docid other_did)
    789789{
    790     LOGCALL(DB, PositionList *, "ChertPostList::read_position_list", NO_ARGS);
     790    LOGCALL(DB, PositionList *, "ChertPostList::read_position_list", other_did);
     791    if (other_did == 0)
     792        other_did = did;
    791793    Assert(this_db.get());
    792     positionlist.read_data(&this_db->position_table, did, term);
     794    positionlist.read_data(&this_db->position_table, other_did, term);
    793795    RETURN(&positionlist);
    794796}
    795797
  • xapian-core/backends/chert/chert_postlist.h

    diff --git a/xapian-core/backends/chert/chert_postlist.h b/xapian-core/backends/chert/chert_postlist.h
    index 534fe3a..5824793 100644
    a b class ChertPostList : public LeafPostList {  
    251251         */
    252252        Xapian::termcount get_wdf() const { Assert(have_started); return wdf; }
    253253
    254         /** Get the list of positions of the term in the current document.
     254        /** Get the list of positions of the term.
    255255         */
    256         PositionList *read_position_list();
     256        PositionList * read_position_list(Xapian::docid other_did = 0);
    257257
    258258        /** Get the list of positions of the term in the current document.
    259259         */
  • xapian-core/backends/contiguousalldocspostlist.cc

    diff --git a/xapian-core/backends/contiguousalldocspostlist.cc b/xapian-core/backends/contiguousalldocspostlist.cc
    index 1bca70f..c045895 100644
    a b ContiguousAllDocsPostList::get_wdf() const  
    6060}
    6161
    6262PositionList *
    63 ContiguousAllDocsPostList::read_position_list()
     63ContiguousAllDocsPostList::read_position_list(Xapian::docid)
    6464{
    6565    // Throws the same exception.
    6666    return ContiguousAllDocsPostList::open_position_list();
  • xapian-core/backends/contiguousalldocspostlist.h

    diff --git a/xapian-core/backends/contiguousalldocspostlist.h b/xapian-core/backends/contiguousalldocspostlist.h
    index ee13133..03cb9f1 100644
    a b class ContiguousAllDocsPostList : public LeafPostList {  
    6868    Xapian::termcount get_wdf() const;
    6969
    7070    /// Throws InvalidOperationError.
    71     PositionList *read_position_list();
     71    PositionList *read_position_list(Xapian::docid other_did = 0);
    7272
    7373    /// Throws InvalidOperationError.
    7474    PositionList * open_position_list() const;
  • xapian-core/backends/inmemory/inmemory_database.cc

    diff --git a/xapian-core/backends/inmemory/inmemory_database.cc b/xapian-core/backends/inmemory/inmemory_database.cc
    index 728c7e1..b5e4cfc 100644
    a b InMemoryDoc::add_posting(const InMemoryTermEntry & post)  
    7878//////////////
    7979
    8080InMemoryPostList::InMemoryPostList(intrusive_ptr<const InMemoryDatabase> db_,
    81                                    const InMemoryTerm & imterm,
     81                                   const InMemoryTerm & imterm_,
    8282                                   const std::string & term_)
    8383        : LeafPostList(term_),
     84          imterm(imterm_),
    8485          pos(imterm.docs.begin()),
    85           end(imterm.docs.end()),
    86           termfreq(imterm.term_freq),
    8786          started(false),
    8887          db(db_)
    8988{
    90     while (pos != end && !pos->valid) ++pos;
     89    while (pos != imterm.docs.end() && !pos->valid) ++pos;
    9190}
    9291
    9392Xapian::doccount
    9493InMemoryPostList::get_termfreq() const
    9594{
    96     return termfreq;
     95    return imterm.term_freq;
    9796}
    9897
    9998Xapian::docid
    InMemoryPostList::next(double /*w_min*/)  
    112111    if (started) {
    113112        Assert(!at_end());
    114113        ++pos;
    115         while (pos != end && !pos->valid) ++pos;
     114        while (pos != imterm.docs.end() && !pos->valid) ++pos;
    116115    } else {
    117116        started = true;
    118117    }
    bool  
    141140InMemoryPostList::at_end() const
    142141{
    143142    if (db->is_closed()) InMemoryDatabase::throw_database_closed();
    144     return (pos == end);
     143    return (pos == imterm.docs.end());
    145144}
    146145
    147146string
    148147InMemoryPostList::get_description() const
    149148{
    150     return "InMemoryPostList " + str(termfreq);
     149    return "InMemoryPostList " + str(imterm.term_freq);
    151150}
    152151
    153152Xapian::termcount
    InMemoryPostList::get_doclength() const  
    158157}
    159158
    160159PositionList *
    161 InMemoryPostList::read_position_list()
     160InMemoryPostList::read_position_list(Xapian::docid other_did)
    162161{
    163162    if (db->is_closed()) InMemoryDatabase::throw_database_closed();
    164     mypositions.set_data(pos->positions);
     163    if (other_did == 0) {
     164        mypositions.set_data(pos->positions);
     165    } else {
     166        vector<InMemoryPosting>::const_iterator p = imterm.docs.begin();
     167        while (p != imterm.docs.end() && (!p->valid || p->did < other_did))
     168            ++p;
     169        if (p != imterm.docs.end() && p->did == other_did)
     170            mypositions.set_data(p->positions);
     171        else
     172            mypositions.set_data(OmDocumentTerm::term_positions());
     173    }
    165174    return &mypositions;
    166175}
    167176
    InMemoryAllDocsPostList::get_wdf() const  
    329338}
    330339
    331340PositionList *
    332 InMemoryAllDocsPostList::read_position_list()
     341InMemoryAllDocsPostList::read_position_list(Xapian::docid)
    333342{
    334343    throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
    335344}
  • xapian-core/backends/inmemory/inmemory_database.h

    diff --git a/xapian-core/backends/inmemory/inmemory_database.h b/xapian-core/backends/inmemory/inmemory_database.h
    index 036834d..e5f2d1e 100644
    a b class InMemoryDatabase;  
    136136class InMemoryPostList : public LeafPostList {
    137137    friend class InMemoryDatabase;
    138138    private:
     139        const InMemoryTerm & imterm;
    139140        vector<InMemoryPosting>::const_iterator pos;
    140         vector<InMemoryPosting>::const_iterator end;
    141         Xapian::doccount termfreq;
    142141        bool started;
    143142
    144143        /** List of positions of the current term.
    class InMemoryPostList : public LeafPostList {  
    156155        Xapian::docid       get_docid() const;     // Gets current docid
    157156        Xapian::termcount   get_doclength() const; // Length of current document
    158157        Xapian::termcount   get_wdf() const;       // Within Document Frequency
    159         PositionList * read_position_list();
     158        PositionList * read_position_list(Xapian::docid other_did = 0);
    160159        PositionList * open_position_list() const;
    161160
    162161        PostList *next(double w_min); // Moves to next docid
    class InMemoryAllDocsPostList : public LeafPostList {  
    185184        Xapian::docid       get_docid() const;     // Gets current docid
    186185        Xapian::termcount   get_doclength() const; // Length of current document
    187186        Xapian::termcount   get_wdf() const;       // Within Document Frequency
    188         PositionList * read_position_list();
     187        PositionList * read_position_list(Xapian::docid other_did = 0);
    189188        PositionList * open_position_list() const;
    190189
    191190        PostList *next(double w_min);      // Moves to next docid
  • xapian-core/backends/remote/net_postlist.cc

    diff --git a/xapian-core/backends/remote/net_postlist.cc b/xapian-core/backends/remote/net_postlist.cc
    index e8240d6..cf5b9aa 100644
    a b NetworkPostList::get_wdf() const  
    5252}
    5353
    5454PositionList *
    55 NetworkPostList::read_position_list()
    56 {
    57     lastposlist = db->open_position_list(lastdocid, term);
    58     return lastposlist.get();
    59 }
    60 
    61 PositionList *
    6255NetworkPostList::open_position_list() const
    6356{
    6457    return db->open_position_list(lastdocid, term);
  • xapian-core/backends/remote/net_postlist.h

    diff --git a/xapian-core/backends/remote/net_postlist.h b/xapian-core/backends/remote/net_postlist.h
    index 236af22..2bc17b5 100644
    a b class NetworkPostList : public LeafPostList {  
    4545
    4646    Xapian::docid lastdocid;
    4747    Xapian::termcount lastwdf;
    48     Xapian::Internal::intrusive_ptr<PositionList> lastposlist;
    4948
    5049    Xapian::doccount termfreq;
    5150
    class NetworkPostList : public LeafPostList {  
    7978    /// Get the Within Document Frequency of the term in the current document.
    8079    Xapian::termcount get_wdf() const;
    8180
    82     /// Read the position list for the term in the current document and
    83     /// return a pointer to it (owned by the PostList).
    84     PositionList * read_position_list();
     81    // We don't need NetworkPostList::read_position_list() since
     82    // read_position_list() is only used by the matcher and remote matches are
     83    // run as local matches on the remote end, and the results serialised.
    8584
    8685    /// Read the position list for the term in the current document and
    8786    /// return a pointer to it (not owned by the PostList).
  • xapian-core/common/submatch.h

    diff --git a/xapian-core/common/submatch.h b/xapian-core/common/submatch.h
    index c90eee0..74e3332 100644
    a b  
    11/** @file submatch.h
    22 *  @brief base class for sub-matchers
    33 */
    4 /* Copyright (C) 2006,2007,2009,2011 Olly Betts
     4/* Copyright (C) 2006,2007,2009,2011,2012 Olly Betts
    55 *
    66 * This program is free software; you can redistribute it and/or modify
    77 * it under the terms of the GNU General Public License as published by
    class SubMatch : public Xapian::Internal::intrusive_base {  
    7676    virtual PostList * get_postlist_and_term_info(MultiMatch *matcher,
    7777        std::map<std::string,
    7878                 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts,
    79         Xapian::termcount * total_subqs_ptr)
     79        Xapian::termcount * total_subqs_ptr,
     80        std::vector<PostList*> & pool_terms)
    8081        = 0;
    8182};
    8283
  • xapian-core/matcher/Makefile.mk

    diff --git a/xapian-core/matcher/Makefile.mk b/xapian-core/matcher/Makefile.mk
    index 0bae22c..85dc522 100644
    a b noinst_HEADERS +=\  
    44        matcher/branchpostlist.h\
    55        matcher/collapser.h\
    66        matcher/const_database_wrapper.h\
     7        matcher/exactphrasecheck.h\
    78        matcher/exactphrasepostlist.h\
    89        matcher/externalpostlist.h\
    910        matcher/extraweightpostlist.h\
    lib_src +=\  
    4243        matcher/branchpostlist.cc\
    4344        matcher/collapser.cc\
    4445        matcher/const_database_wrapper.cc\
     46        matcher/exactphrasecheck.cc\
    4547        matcher/exactphrasepostlist.cc\
    4648        matcher/externalpostlist.cc\
    4749        matcher/localsubmatch.cc\
  • new file xapian-core/matcher/exactphrasecheck.cc

    diff --git a/xapian-core/matcher/exactphrasecheck.cc b/xapian-core/matcher/exactphrasecheck.cc
    new file mode 100644
    index 0000000..ed55cea
    - +  
     1/** @file exactphrasecheck.cc
     2 * @brief Check if terms form a particular exact phrase.
     3 */
     4/* Copyright (C) 2006,2007,2009,2012 Olly Betts
     5 *
     6 * This program is free software; you can redistribute it and/or modify
     7 * it under the terms of the GNU General Public License as published by
     8 * the Free Software Foundation; either version 2 of the License, or
     9 * (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     19 */
     20
     21// FIXME: this could probably share code with ExactPhrasePostList.
     22
     23#include <config.h>
     24
     25#include "exactphrasecheck.h"
     26
     27#include "debuglog.h"
     28#include "omassert.h"
     29#include "api/postlist.h"
     30#include "backends/positionlist.h"
     31
     32#include <algorithm>
     33#include <vector>
     34
     35using namespace std;
     36
     37class TermCompare {
     38    vector<PostList*> & terms;
     39
     40  public:
     41    TermCompare(vector<PostList*> & terms_) : terms(terms_) { }
     42
     43    bool operator()(unsigned a, unsigned b) const {
     44        return terms[a]->get_termfreq_est() < terms[b]->get_termfreq_est();
     45    }
     46};
     47
     48ExactPhraseCheck::ExactPhraseCheck(const Xapian::Database & db_,
     49                                   const vector<PostList*> &terms_)
     50    : db(db_), terms(terms_)
     51{
     52    if (terms.empty()) {
     53        poslists = NULL;
     54        order = NULL;
     55        return;
     56    }
     57
     58    AssertRel(terms.size(),>,1);
     59    size_t n = terms.size();
     60    poslists = new PositionList*[n];
     61    try {
     62        order = new unsigned[n];
     63    } catch (...) {
     64        delete [] poslists;
     65        throw;
     66    }
     67    for (size_t i = 0; i < n; ++i) order[i] = unsigned(i);
     68}
     69
     70ExactPhraseCheck::~ExactPhraseCheck()
     71{
     72    delete [] poslists;
     73    delete [] order;
     74}
     75
     76void
     77ExactPhraseCheck::start_position_list(unsigned i, Xapian::docid did)
     78{
     79    AssertRel(i,<,terms.size());
     80    unsigned index = order[i];
     81    poslists[i] = terms[index]->read_position_list(did);
     82    poslists[i]->index = index;
     83}
     84
     85bool
     86ExactPhraseCheck::operator()(Xapian::docid did)
     87{
     88    LOGCALL(MATCH, bool, "ExactPhraseCheck::operator()", did);
     89
     90    if (terms.size() <= 1) RETURN(true);
     91
     92    // We often don't need to read all the position lists, so rather than using
     93    // the shortest position lists first, we approximate by using the terms
     94    // with the lowest wdf first.  This will typically give the same or a very
     95    // similar order.
     96    sort(order, order + terms.size(), TermCompare(terms));
     97
     98    AssertRel(terms.size(),>,1);
     99
     100    // If the first term we check only occurs too close to the start of the
     101    // document, we only need to read one term's positions.  E.g. search for
     102    // "ripe mango" when the only occurrence of 'mango' in the current document
     103    // is at position 0.
     104    start_position_list(0, did);
     105    poslists[0]->skip_to(poslists[0]->index);
     106    if (poslists[0]->at_end()) RETURN(false);
     107
     108    // If we get here, we'll need to read the positionlists for at least two
     109    // terms, so check the true positionlist length for the two terms with the
     110    // lowest wdf and if necessary swap them so the true shorter one is first.
     111    start_position_list(1, did);
     112    if (poslists[0]->get_size() < poslists[1]->get_size()) {
     113        poslists[1]->skip_to(poslists[1]->index);
     114        if (poslists[1]->at_end()) RETURN(false);
     115        swap(poslists[0], poslists[1]);
     116    }
     117
     118    {
     119        unsigned read_hwm = 1;
     120        Xapian::termpos idx0 = poslists[0]->index;
     121        do {
     122            Xapian::termpos base = poslists[0]->get_position() - idx0;
     123            unsigned i = 1;
     124            while (true) {
     125                if (i > read_hwm) {
     126                    read_hwm = i;
     127                    start_position_list(i, did);
     128                    // FIXME: consider comparing with poslist[0] and swapping
     129                    // if less common.  Should we allow for the number of positions
     130                    // we've read from poslist[0] already?
     131                }
     132                Xapian::termpos required = base + poslists[i]->index;
     133                poslists[i]->skip_to(required);
     134                if (poslists[i]->at_end()) RETURN(false);
     135                if (poslists[i]->get_position() != required) break;
     136                if (++i == terms.size()) RETURN(true);
     137            }
     138            poslists[0]->next();
     139        } while (!poslists[0]->at_end());
     140    }
     141    RETURN(false);
     142}
  • new file xapian-core/matcher/exactphrasecheck.h

    diff --git a/xapian-core/matcher/exactphrasecheck.h b/xapian-core/matcher/exactphrasecheck.h
    new file mode 100644
    index 0000000..d2e28a5
    - +  
     1/** @file exactphrasecheck.h
     2 * @brief Check if terms form a particular exact phrase.
     3 */
     4/* Copyright (C) 2006,2012 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
     6 *
     7 * This program is free software; you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation; either version 2 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * This program is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with this program; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     20 */
     21
     22#ifndef XAPIAN_INCLUDED_EXACTPHRASEPOSTLIST_H
     23#define XAPIAN_INCLUDED_EXACTPHRASEPOSTLIST_H
     24
     25#include "xapian/database.h"
     26
     27#include <string>
     28#include <vector>
     29
     30typedef Xapian::PositionIterator::Internal PositionList;
     31typedef Xapian::PostingIterator::Internal PostList;
     32
     33/** Check for an exact phrase using positional information.
     34 *
     35 *  Tests if the terms occur somewhere in the document in the order given
     36 *  and at adjacent term positions.
     37 */
     38class ExactPhraseCheck {
     39    Xapian::Database db;
     40
     41    std::vector<PostList*> terms;
     42
     43    PositionList ** poslists;
     44
     45    unsigned * order;
     46
     47    /// Start reading from the i-th position list.
     48    void start_position_list(unsigned i, Xapian::docid did);
     49
     50  public:
     51    ExactPhraseCheck(const Xapian::Database & db_,
     52                     const std::vector<PostList*> &terms_);
     53
     54    ~ExactPhraseCheck();
     55
     56    /// Test if the specified document contains the terms as an exact phrase.
     57    bool operator()(Xapian::docid did);
     58};
     59
     60#endif
  • xapian-core/matcher/exactphrasepostlist.cc

    diff --git a/xapian-core/matcher/exactphrasepostlist.cc b/xapian-core/matcher/exactphrasepostlist.cc
    index 59e3b00..4427b82 100644
    a b ExactPhrasePostList::test_doc()  
    102102        swap(poslists[0], poslists[1]);
    103103    }
    104104
    105     unsigned read_hwm = 1;
    106     Xapian::termpos idx0 = poslists[0]->index;
    107     do {
    108         Xapian::termpos base = poslists[0]->get_position() - idx0;
    109         unsigned i = 1;
    110         while (true) {
    111             if (i > read_hwm) {
    112                 read_hwm = i;
    113                 start_position_list(i);
    114                 // FIXME: consider comparing with poslist[0] and swapping
    115                 // if less common.  Should we allow for the number of positions
    116                 // we've read from poslist[0] already?
     105    {
     106        unsigned read_hwm = 1;
     107        Xapian::termpos idx0 = poslists[0]->index;
     108        do {
     109            Xapian::termpos base = poslists[0]->get_position() - idx0;
     110            unsigned i = 1;
     111            while (true) {
     112                if (i > read_hwm) {
     113                    read_hwm = i;
     114                    start_position_list(i);
     115                    // FIXME: consider comparing with poslist[0] and swapping
     116                    // if less common.  Should we allow for the number of positions
     117                    // we've read from poslist[0] already?
     118                }
     119                Xapian::termpos required = base + poslists[i]->index;
     120                poslists[i]->skip_to(required);
     121                if (poslists[i]->at_end()) RETURN(false);
     122                if (poslists[i]->get_position() != required) break;
     123                if (++i == terms.size()) RETURN(true);
    117124            }
    118             Xapian::termpos required = base + poslists[i]->index;
    119             poslists[i]->skip_to(required);
    120             if (poslists[i]->at_end()) RETURN(false);
    121             if (poslists[i]->get_position() != required) break;
    122             if (++i == terms.size()) RETURN(true);
    123         }
    124         poslists[0]->next();
    125     } while (!poslists[0]->at_end());
     125            poslists[0]->next();
     126        } while (!poslists[0]->at_end());
     127    }
    126128    RETURN(false);
    127129}
    128130
  • xapian-core/matcher/externalpostlist.cc

    diff --git a/xapian-core/matcher/externalpostlist.cc b/xapian-core/matcher/externalpostlist.cc
    index 053c91a..38ba913 100644
    a b ExternalPostList::recalc_maxweight()  
    115115}
    116116
    117117PositionList *
    118 ExternalPostList::read_position_list()
     118ExternalPostList::read_position_list(Xapian::docid)
    119119{
    120120    return NULL;
    121121}
  • xapian-core/matcher/externalpostlist.h

    diff --git a/xapian-core/matcher/externalpostlist.h b/xapian-core/matcher/externalpostlist.h
    index ee97aca..5dd8e08 100644
    a b class ExternalPostList : public PostList {  
    7474
    7575    double recalc_maxweight();
    7676
    77     PositionList * read_position_list();
     77    PositionList * read_position_list(Xapian::docid other_did = 0);
    7878
    7979    PositionList * open_position_list() const;
    8080
  • xapian-core/matcher/localsubmatch.cc

    diff --git a/xapian-core/matcher/localsubmatch.cc b/xapian-core/matcher/localsubmatch.cc
    index 10c648f..4751138 100644
    a b  
    11/** @file localsubmatch.cc
    22 *  @brief SubMatch class for a local database.
    33 */
    4 /* Copyright (C) 2006,2007,2009,2010,2011 Olly Betts
     4/* Copyright (C) 2006,2007,2009,2010,2011,2012 Olly Betts
    55 * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
    66 *
    77 * This program is free software; you can redistribute it and/or modify
    LocalSubMatch::start_match(Xapian::doccount first,  
    6868PostList *
    6969LocalSubMatch::get_postlist_and_term_info(MultiMatch * matcher,
    7070        map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts,
    71         Xapian::termcount * total_subqs_ptr)
     71        Xapian::termcount * total_subqs_ptr,
     72        std::vector<PostList*> & pool_terms)
    7273{
    7374    LOGCALL(MATCH, PostList *, "LocalSubMatch::get_postlist_and_term_info", matcher | termfreqandwts | total_subqs_ptr);
    7475    (void)matcher;
    LocalSubMatch::get_postlist_and_term_info(MultiMatch * matcher,  
    8283
    8384    PostList * pl;
    8485    {
    85         QueryOptimiser opt(*db, *this, matcher);
     86        QueryOptimiser opt(*db, *this, matcher, pool_terms);
    8687        pl = query.internal->postlist(&opt, 1.0);
    8788        *total_subqs_ptr = opt.get_total_subqs();
    8889    }
  • xapian-core/matcher/localsubmatch.h

    diff --git a/xapian-core/matcher/localsubmatch.h b/xapian-core/matcher/localsubmatch.h
    index 8e92416..1d1a7e0 100644
    a b  
    11/** @file localsubmatch.h
    22 *  @brief SubMatch class for a local database.
    33 */
    4 /* Copyright (C) 2006,2007,2009,2010,2011 Olly Betts
     4/* Copyright (C) 2006,2007,2009,2010,2011,2012 Olly Betts
    55 * Copyright (C) 2007 Lemur Consulting Ltd
    66 *
    77 * This program is free software; you can redistribute it and/or modify
    class LocalSubMatch : public SubMatch {  
    8989    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    9090        std::map<std::string,
    9191                 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts,
    92         Xapian::termcount * total_subqs_ptr);
     92        Xapian::termcount * total_subqs_ptr,
     93        std::vector<PostList*> & pool_terms);
    9394
    9495    /** Convert a postlist into a synonym postlist.
    9596     */
  • xapian-core/matcher/multimatch.cc

    diff --git a/xapian-core/matcher/multimatch.cc b/xapian-core/matcher/multimatch.cc
    index 654908a..69a195f 100644
    a b  
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2001,2002 Ananova Ltd
    5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011 Olly Betts
     5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012 Olly Betts
    66 * Copyright 2003 Orange PCS Ltd
    77 * Copyright 2003 Sam Liddicott
    88 * Copyright 2007,2008,2009 Lemur Consulting Ltd
     
    4646#include "valuestreamdocument.h"
    4747#include "weight/weightinternal.h"
    4848
     49#include "exactphrasecheck.h"
     50
    4951#include <xapian/errorhandler.h>
    5052#include <xapian/matchspy.h>
    5153#include <xapian/version.h> // For XAPIAN_HAS_REMOTE_BACKEND
    MultiMatch::get_mset(Xapian::doccount first, Xapian::doccount maxitems,  
    355357    map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts_ptr;
    356358    termfreqandwts_ptr = &termfreqandwts;
    357359
     360    vector<PostList*> pool_terms;
    358361    Xapian::termcount total_subqs = 0;
    359362    // Keep a count of matches which we know exist, but we won't see.  This
    360363    // occurs when a submatch is remote, and returns a lower bound on the
    MultiMatch::get_mset(Xapian::doccount first, Xapian::doccount maxitems,  
    364367    for (size_t i = 0; i != leaves.size(); ++i) {
    365368        PostList *pl;
    366369        try {
     370            if (!is_remote[i]) pool_terms.clear();
    367371            pl = leaves[i]->get_postlist_and_term_info(this,
    368372                                                       termfreqandwts_ptr,
    369                                                        &total_subqs);
     373                                                       &total_subqs,
     374                                                       pool_terms);
    370375            if (termfreqandwts_ptr && !termfreqandwts.empty())
    371376                termfreqandwts_ptr = NULL;
    372377            if (is_remote[i]) {
    MultiMatch::get_mset(Xapian::doccount first, Xapian::doccount maxitems,  
    525530    // Is the mset a valid heap?
    526531    bool is_heap = false;
    527532
     533    size_t SETTLING_POND_SIZE = 0;
     534    if (!pool_terms.empty()) {
     535        const char * sps = getenv("POND_SIZE");
     536        SETTLING_POND_SIZE = sps ? atoi(sps) : 100000;
     537    }
     538    ExactPhraseCheck phrase_check(db, pool_terms);
     539    // FIXME: a min/max heap is probably a better choice here (notably more
     540    // compact) but the STL doesn't provide one so we'd have to find an
     541    // implementation or write one.
     542    multimap<double, Xapian::Internal::MSetItem> settling_pond;
    528543    while (true) {
    529544        bool pushback;
    530545
    MultiMatch::get_mset(Xapian::doccount first, Xapian::doccount maxitems,  
    646661            new_item.wt = wt;
    647662        }
    648663
     664        if (SETTLING_POND_SIZE) {
     665            if (items.size() >= max_msize) {
     666                // Settling pond handling...
     667                multimap<double, Xapian::Internal::MSetItem>::iterator it;
     668                it = settling_pond.upper_bound(-min_weight);
     669                settling_pond.erase(it, settling_pond.end());
     670
     671                settling_pond.insert(make_pair(-new_item.wt, new_item));
     672                if (settling_pond.size() < SETTLING_POND_SIZE) {
     673                    continue;
     674                }
     675
     676                // Take the last item off the heap, which will have a reasonably
     677                // high weight in general.
     678                it = settling_pond.begin();
     679                swap(new_item, it->second);
     680                settling_pond.erase(it);
     681            }
     682            if (!phrase_check(new_item.did)) continue;
     683        }
     684
    649685        pushback = true;
    650686
    651687        // Perform collapsing on key if requested.
    new_greatest_weight:  
    808844        }
    809845    }
    810846
     847    multimap<double, Xapian::Internal::MSetItem>::iterator it;
     848    for (it = settling_pond.begin(); it != settling_pond.end(); ++it) {
     849        const Xapian::Internal::MSetItem & new_item = it->second;
     850        if (new_item.wt < min_weight) break;
     851        if (!phrase_check(new_item.did)) continue;
     852
     853        {
     854            ++docs_matched;
     855            if (items.size() >= max_msize) {
     856                items.push_back(new_item);
     857                if (!is_heap) {
     858                    is_heap = true;
     859                    make_heap(items.begin(), items.end(), mcmp);
     860                } else {
     861                    push_heap<vector<Xapian::Internal::MSetItem>::iterator,
     862                              MSetCmp>(items.begin(), items.end(), mcmp);
     863                }
     864                pop_heap<vector<Xapian::Internal::MSetItem>::iterator,
     865                         MSetCmp>(items.begin(), items.end(), mcmp);
     866                items.pop_back();
     867
     868                min_item = items.front();
     869                if (sort_by == REL || sort_by == REL_VAL) {
     870                    if (docs_matched >= check_at_least) {
     871                        if (sort_by == REL) {
     872                            // We're done if this is a forward boolean match
     873                            // with only one database (bodgetastic, FIXME
     874                            // better if we can!)
     875                            if (rare(max_possible == 0 && sort_forward)) {
     876                                // In the multi database case, MergePostList
     877                                // currently processes each database
     878                                // sequentially (which actually may well be
     879                                // more efficient) so the docids in general
     880                                // won't arrive in order.
     881                                // FIXME: is this still good here:
     882                                // if (leaves.size() == 1) break;
     883                            }
     884                        }
     885                        if (min_item.wt > min_weight) {
     886                            LOGLINE(MATCH, "Setting min_weight to " <<
     887                                    min_item.wt << " from " << min_weight);
     888                            min_weight = min_item.wt;
     889                        }
     890                    }
     891                }
     892            } else {
     893                items.push_back(new_item);
     894                is_heap = false;
     895                if (sort_by == REL && items.size() == max_msize) {
     896                    if (docs_matched >= check_at_least) {
     897                        // We're done if this is a forward boolean match
     898                        // with only one database (bodgetastic, FIXME
     899                        // better if we can!)
     900                        if (rare(max_possible == 0 && sort_forward)) {
     901                            // In the multi database case, MergePostList
     902                            // currently processes each database
     903                            // sequentially (which actually may well be
     904                            // more efficient) so the docids in general
     905                            // won't arrive in order.
     906                            // FIXME: if (leaves.size() == 1) break;
     907                        }
     908                    }
     909                }
     910            }
     911        }
     912
     913        // Keep a track of the greatest weight we've seen.
     914        if (new_item.wt > greatest_wt) {
     915            greatest_wt = new_item.wt;
     916#ifdef XAPIAN_HAS_REMOTE_BACKEND
     917            const unsigned int multiplier = db.internal.size();
     918            unsigned int db_num = (new_item.did - 1) % multiplier;
     919            if (is_remote[db_num]) {
     920                // Note that the greatest weighted document came from a remote
     921                // database, and which one.
     922                greatest_wt_subqs_db_num = db_num;
     923            } else
     924#endif
     925            {
     926                greatest_wt_subqs_matched = pl->count_matching_subqs();
     927#ifdef XAPIAN_HAS_REMOTE_BACKEND
     928                greatest_wt_subqs_db_num = UINT_MAX;
     929#endif
     930            }
     931            if (percent_cutoff) {
     932                double w = new_item.wt * percent_cutoff_factor;
     933                if (w > min_weight) {
     934                    min_weight = w;
     935                    if (!is_heap) {
     936                        is_heap = true;
     937                        make_heap<vector<Xapian::Internal::MSetItem>::iterator,
     938                                  MSetCmp>(items.begin(), items.end(), mcmp);
     939                    }
     940                    while (!items.empty() && items.front().wt < min_weight) {
     941                        pop_heap<vector<Xapian::Internal::MSetItem>::iterator,
     942                                 MSetCmp>(items.begin(), items.end(), mcmp);
     943                        Assert(items.back().wt < min_weight);
     944                        items.pop_back();
     945                    }
     946#ifdef XAPIAN_ASSERTIONS_PARANOID
     947                    vector<Xapian::Internal::MSetItem>::const_iterator i;
     948                    for (i = items.begin(); i != items.end(); ++i) {
     949                        Assert(i->wt >= min_weight);
     950                    }
     951#endif
     952                }
     953            }
     954        }
     955    }
     956
     957
    811958    // done with posting list tree
    812959    pl.reset(NULL);
    813960
  • xapian-core/matcher/multixorpostlist.h

    diff --git a/xapian-core/matcher/multixorpostlist.h b/xapian-core/matcher/multixorpostlist.h
    index ce0bf8c..5588b4a 100644
    a b class MultiXorPostList : public PostList {  
    101101
    102102    double recalc_maxweight();
    103103
    104     PositionList * read_position_list() {
     104    PositionList * read_position_list(Xapian::docid = 0) {
    105105        return NULL;
    106106    }
    107107
  • xapian-core/matcher/queryoptimiser.h

    diff --git a/xapian-core/matcher/queryoptimiser.h b/xapian-core/matcher/queryoptimiser.h
    index 7147b76..3bf2a6a 100644
    a b  
    11/** @file queryoptimiser.h
    22 * @brief Details passed around while building PostList tree from Query tree
    33 */
    4 /* Copyright (C) 2007,2008,2009,2010,2011 Olly Betts
     4/* Copyright (C) 2007,2008,2009,2010,2011,2012 Olly Betts
    55 * Copyright (C) 2008 Lemur Consulting Ltd
    66 *
    77 * This program is free software; you can redistribute it and/or
    class QueryOptimiser {  
    4949    Xapian::termcount total_subqs;
    5050
    5151  public:
     52    std::vector<PostList*> & pool_terms;
     53
     54    bool top_and;
     55
    5256    const Xapian::Database::Internal & db;
    5357
    5458    Xapian::doccount db_size;
    class QueryOptimiser {  
    5761
    5862    QueryOptimiser(const Xapian::Database::Internal & db_,
    5963                   LocalSubMatch & localsubmatch_,
    60                    MultiMatch * matcher_)
     64                   MultiMatch * matcher_,
     65                   std::vector<PostList*> & pool_terms_)
    6166        : localsubmatch(localsubmatch_), total_subqs(0),
    62           db(db_), db_size(db.get_doccount()), matcher(matcher_) { }
     67          pool_terms(pool_terms_), top_and(true), db(db_),
     68          db_size(db.get_doccount()), matcher(matcher_) { }
    6369
    6470    void inc_total_subqs() { ++total_subqs; }
    6571
  • xapian-core/matcher/remotesubmatch.cc

    diff --git a/xapian-core/matcher/remotesubmatch.cc b/xapian-core/matcher/remotesubmatch.cc
    index ff5184e..d58fff5 100644
    a b  
    11/** @file remotesubmatch.cc
    22 *  @brief SubMatch class for a remote database.
    33 */
    4 /* Copyright (C) 2006,2007,2009,2010,2011 Olly Betts
     4/* Copyright (C) 2006,2007,2009,2010,2011,2012 Olly Betts
    55 * Copyright (C) 2007,2008 Lemur Consulting Ltd
    66 *
    77 * This program is free software; you can redistribute it and/or modify
    RemoteSubMatch::start_match(Xapian::doccount first,  
    6262PostList *
    6363RemoteSubMatch::get_postlist_and_term_info(MultiMatch *,
    6464        map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts,
    65         Xapian::termcount * total_subqs_ptr)
     65        Xapian::termcount * total_subqs_ptr,
     66        std::vector<PostList*> &)
    6667{
    67     LOGCALL(MATCH, PostList *, "RemoteSubMatch::get_postlist_and_term_info", Literal("[matcher]") | termfreqandwts | total_subqs_ptr);
     68    LOGCALL(MATCH, PostList *, "RemoteSubMatch::get_postlist_and_term_info", Literal("[matcher]") | termfreqandwts | total_subqs_ptr | Literal("[pool_terms]"));
    6869    Xapian::MSet mset;
    6970    db->get_mset(mset, matchspies);
    7071    percent_factor = mset.internal->percent_factor;
  • xapian-core/matcher/remotesubmatch.h

    diff --git a/xapian-core/matcher/remotesubmatch.h b/xapian-core/matcher/remotesubmatch.h
    index 1198d8a..6292f9f 100644
    a b  
    11/** @file remotesubmatch.h
    22 *  @brief SubMatch class for a remote database.
    33 */
    4 /* Copyright (C) 2006,2007,2009,2011 Olly Betts
     4/* Copyright (C) 2006,2007,2009,2011,2012 Olly Betts
    55 * Copyright (C) 2007,2008 Lemur Consulting Ltd
    66 *
    77 * This program is free software; you can redistribute it and/or modify
    class RemoteSubMatch : public SubMatch {  
    7272    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    7373        std::map<std::string,
    7474                 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts,
    75         Xapian::termcount * total_subqs_ptr);
     75        Xapian::termcount * total_subqs_ptr,
     76        std::vector<PostList*> & pool_terms);
    7677
    7778    /// Get percentage factor - only valid after get_postlist_and_term_info().
    7879    double get_percent_factor() const { return percent_factor; }
  • xapian-core/matcher/selectpostlist.h

    diff --git a/xapian-core/matcher/selectpostlist.h b/xapian-core/matcher/selectpostlist.h
    index 41151f9..77a2736 100644
    a b class SelectPostList : public PostList {  
    5454        double get_weight() const { return source->get_weight(); }
    5555        Xapian::termcount get_doclength() const { return source->get_doclength(); }
    5656        double recalc_maxweight() { return source->recalc_maxweight(); }
    57         PositionList * read_position_list() { return source->read_position_list(); }
     57        PositionList * read_position_list(Xapian::docid other_did = 0) {
     58            return source->read_position_list(other_did);
     59        }
    5860        PositionList * open_position_list() const { return source->open_position_list(); }
    5961        bool at_end() const { return source->at_end(); }
    6062
  • xapian-core/matcher/valuerangepostlist.cc

    diff --git a/xapian-core/matcher/valuerangepostlist.cc b/xapian-core/matcher/valuerangepostlist.cc
    index f940ab5..4f3e625 100644
    a b ValueRangePostList::recalc_maxweight()  
    103103}
    104104
    105105PositionList *
    106 ValueRangePostList::read_position_list()
     106ValueRangePostList::read_position_list(Xapian::docid)
    107107{
    108108    Assert(db);
    109109    return NULL;
  • xapian-core/matcher/valuerangepostlist.h

    diff --git a/xapian-core/matcher/valuerangepostlist.h b/xapian-core/matcher/valuerangepostlist.h
    index 37c7027..542d2a0 100644
    a b class ValueRangePostList : public PostList {  
    7373
    7474    double recalc_maxweight();
    7575
    76     PositionList * read_position_list();
     76    PositionList * read_position_list(Xapian::docid other_did = 0);
    7777
    7878    PositionList * open_position_list() const;
    7979