Ticket #394: phrase-settling-pond.patch

File phrase-settling-pond.patch, 23.3 KB (added by Olly Betts, 15 years ago)

Prototype patch

  • matcher/Makefile.mk

     
    33        matcher/andnotpostlist.h\
    44        matcher/branchpostlist.h\
    55        matcher/collapser.h\
     6        matcher/exactphrasecheck.h\
    67        matcher/exactphrasepostlist.h\
    78        matcher/externalpostlist.h\
    89        matcher/extraweightpostlist.h\
     
    3839        matcher/andnotpostlist.cc\
    3940        matcher/branchpostlist.cc\
    4041        matcher/collapser.cc\
     42        matcher/exactphrasecheck.cc\
    4143        matcher/exactphrasepostlist.cc\
    4244        matcher/externalpostlist.cc\
    4345        matcher/localmatch.cc\
  • matcher/exactphrasecheck.cc

     
     1/** @file exactphrasecheck.cc
     2 * @brief Check if terms form a particular exact phrase.
     3 */
     4/* Copyright (C) 2006,2007,2009 Olly Betts
     5 *
     6 * This program is free software; you can redistribute it and/or modify
     7 * it under the terms of the GNU General Public License as published by
     8 * the Free Software Foundation; either version 2 of the License, or
     9 * (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     19 */
     20
     21// FIXME: this could probably share code with ExactPhrasePostList.
     22
     23#include <config.h>
     24
     25#include "exactphrasecheck.h"
     26#include "positionlist.h"
     27#include "postlist.h"
     28#include "omassert.h"
     29#include "omdebug.h"
     30
     31#include <algorithm>
     32#include <vector>
     33
     34using namespace std;
     35
     36namespace {
     37
     38class TermCompare {
     39    const Xapian::Database & db;
     40    vector<string> & terms;
     41
     42  public:
     43    TermCompare(const Xapian::Database & db_,
     44                vector<string> & terms_)
     45        : db(db_), terms(terms_) { }
     46
     47    bool operator()(unsigned a, unsigned b) const {
     48        return db.get_collection_freq(terms[a]) < db.get_collection_freq(terms[b]);
     49    }
     50};
     51
     52}
     53
     54ExactPhraseCheck::ExactPhraseCheck(const Xapian::Database & db_,
     55                                   const vector<string> &terms_)
     56    : db(db_), terms(terms_)
     57{
     58    if (terms.empty()) {
     59        poslists = NULL;
     60        order = NULL;
     61        return;
     62    }
     63
     64    AssertRel(terms.size(),>,1);
     65    size_t n = terms_.size();
     66    poslists = new PositionList*[n];
     67    try {
     68        order = new unsigned[n];
     69    } catch (...) {
     70        delete [] poslists;
     71        throw;
     72    }
     73    for (size_t i = 0; i < n; ++i) {
     74        poslists[i] = NULL;
     75        order[i] = unsigned(i);
     76    }
     77
     78    // We often don't need to read all the position lists, so rather than using
     79    // the shortest position lists first, we approximate by using the terms
     80    // with the lowest collection freq first.  Overall this should give a
     81    // similar order.
     82    sort(order, order + terms.size(), TermCompare(db, terms));
     83}
     84
     85ExactPhraseCheck::~ExactPhraseCheck()
     86{
     87    delete [] poslists;
     88    delete [] order;
     89}
     90
     91bool
     92ExactPhraseCheck::start_position_list(unsigned i, Xapian::docid did)
     93{
     94    AssertRel(i,<,terms.size());
     95    unsigned index = order[i];
     96    // FIXME: nasty hacking around with internals and refcount - we should
     97    // just add a new Databse::Internal method to do what we want.
     98    Xapian::PositionIterator p = db.positionlist_begin(did, terms[index]);
     99    PositionList * tmp = p.internal.get();
     100    if (!tmp)
     101        return false;
     102    ++tmp->ref_count;
     103    p.internal = poslists[i];
     104    poslists[i] = tmp;
     105    poslists[i]->index = index;
     106    return true;
     107}
     108
     109bool
     110ExactPhraseCheck::operator()(Xapian::docid did)
     111{
     112    DEBUGCALL(MATCH, bool, "ExactPhraseCheck::operator()", did);
     113
     114    AssertRel(terms.size(),>,1);
     115
     116    bool result = false;
     117    // If the first term we check only occurs too close to the start of the
     118    // document, we only need to read one term's positions.  E.g. search for
     119    // "ripe mango" when the only occurrence of 'mango' in the current document
     120    // is at position 0.
     121    if (!start_position_list(0, did))
     122        goto done;
     123    poslists[0]->skip_to(poslists[0]->index);
     124    if (poslists[0]->at_end()) goto done;
     125
     126    // If we get here, we'll need to read the positionlists for at least two
     127    // terms, so check the true positionlist length for the two terms with the
     128    // lowest wdf and if necessary swap them so the true shorter one is first.
     129    if (!start_position_list(1, did))
     130        goto done;
     131    if (poslists[0]->get_size() < poslists[1]->get_size()) {
     132        poslists[1]->skip_to(poslists[1]->index);
     133        if (poslists[1]->at_end()) goto done;
     134        swap(poslists[0], poslists[1]);
     135    }
     136
     137    {
     138        unsigned read_hwm = 1;
     139        Xapian::termpos idx0 = poslists[0]->index;
     140        do {
     141            Xapian::termpos base = poslists[0]->get_position() - idx0;
     142            unsigned i = 1;
     143            while (true) {
     144                if (i > read_hwm) {
     145                    read_hwm = i;
     146                    if (!start_position_list(i, did))
     147                        goto done;
     148                    // FIXME: consider comparing with poslist[0] and swapping
     149                    // if less common.  Should we allow for the number of positions
     150                    // we've read from poslist[0] already?
     151                }
     152                Xapian::termpos required = base + poslists[i]->index;
     153                poslists[i]->skip_to(required);
     154                if (poslists[i]->at_end()) goto done;
     155                if (poslists[i]->get_position() != required) break;
     156                if (++i == terms.size()) {
     157                    result = true;
     158                    goto done;
     159                }
     160            }
     161            poslists[0]->next();
     162        } while (!poslists[0]->at_end());
     163    }
     164done:
     165    for (size_t i = 0; i < terms.size(); ++i) {
     166        delete poslists[i];
     167        poslists[i] = NULL;
     168    }
     169    RETURN(result);
     170}
  • matcher/multimatch.cc

     
    4646
    4747#include "weightinternal.h"
    4848
     49#include "exactphrasecheck.h"
     50
    4951#include <xapian/errorhandler.h>
    5052#include <xapian/matchspy.h>
    5153#include <xapian/version.h> // For XAPIAN_HAS_REMOTE_BACKEND
     
    356358    map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts_ptr;
    357359    termfreqandwts_ptr = &termfreqandwts;
    358360
     361    vector<string> pool_terms;
    359362    Xapian::termcount total_subqs = 0;
    360363    // Keep a count of matches which we know exist, but we won't see.  This
    361364    // occurs when a submatch is remote, and returns a lower bound on the
     
    365368    for (size_t i = 0; i != leaves.size(); ++i) {
    366369        PostList *pl;
    367370        try {
     371            if (!is_remote[i]) pool_terms.clear();
    368372            pl = leaves[i]->get_postlist_and_term_info(this,
    369373                                                       termfreqandwts_ptr,
    370                                                        &total_subqs);
     374                                                       &total_subqs,
     375                                                       pool_terms);
    371376            if (termfreqandwts_ptr && !termfreqandwts.empty())
    372377                termfreqandwts_ptr = NULL;
    373378            if (is_remote[i]) {
     
    522527    // Is the mset a valid heap?
    523528    bool is_heap = false;
    524529
     530    size_t SETTLING_POND_SIZE = 0;
     531    if (!pool_terms.empty()) {
     532        const char * sps = getenv("POND_SIZE");
     533        SETTLING_POND_SIZE = sps ? atoi(sps) : 100000;
     534    }
     535    ExactPhraseCheck phrase_check(db, pool_terms);
     536    // FIXME: a min/max heap is probably a better choice here (notably more
     537    // compact) but the STL doesn't provide one so we'd have to find an
     538    // implementation or write one.
     539    multimap<Xapian::weight, Xapian::Internal::MSetItem> settling_pond;
    525540    while (true) {
    526541        bool pushback;
    527542
     
    649664            new_item.wt = wt;
    650665        }
    651666
     667        if (SETTLING_POND_SIZE) {
     668            if (items.size() >= max_msize) {
     669                // Settling pond handling...
     670                multimap<Xapian::weight, Xapian::Internal::MSetItem>::iterator it;
     671                it = settling_pond.upper_bound(-min_weight);
     672                settling_pond.erase(it, settling_pond.end());
     673
     674                settling_pond.insert(make_pair(-new_item.wt, new_item));
     675                if (settling_pond.size() < SETTLING_POND_SIZE) {
     676                    continue;
     677                }
     678
     679                // Take the last item off the heap, which will have a reasonably
     680                // high weight in general.
     681                it = settling_pond.begin();
     682                swap(new_item, it->second);
     683                settling_pond.erase(it);
     684            }
     685            if (!phrase_check(new_item.did)) continue;
     686        }
     687
    652688        pushback = true;
    653689
    654690        // Perform collapsing on key if requested.
     
    811847        }
    812848    }
    813849
     850    multimap<Xapian::weight, Xapian::Internal::MSetItem>::iterator it;
     851    for (it = settling_pond.begin(); it != settling_pond.end(); ++it) {
     852        const Xapian::Internal::MSetItem & new_item = it->second;
     853        if (new_item.wt < min_weight) break;
     854        if (!phrase_check(new_item.did)) continue;
     855
     856        {
     857            ++docs_matched;
     858            if (items.size() >= max_msize) {
     859                items.push_back(new_item);
     860                if (!is_heap) {
     861                    is_heap = true;
     862                    make_heap(items.begin(), items.end(), mcmp);
     863                } else {
     864                    push_heap<vector<Xapian::Internal::MSetItem>::iterator,
     865                              MSetCmp>(items.begin(), items.end(), mcmp);
     866                }
     867                pop_heap<vector<Xapian::Internal::MSetItem>::iterator,
     868                         MSetCmp>(items.begin(), items.end(), mcmp);
     869                items.pop_back();
     870
     871                min_item = items.front();
     872                if (sort_by == REL || sort_by == REL_VAL) {
     873                    if (docs_matched >= check_at_least) {
     874                        if (sort_by == REL) {
     875                            // We're done if this is a forward boolean match
     876                            // with only one database (bodgetastic, FIXME
     877                            // better if we can!)
     878                            if (rare(max_possible == 0 && sort_forward)) {
     879                                // In the multi database case, MergePostList
     880                                // currently processes each database
     881                                // sequentially (which actually may well be
     882                                // more efficient) so the docids in general
     883                                // won't arrive in order.
     884                                // FIXME: is this still good here:
     885                                // if (leaves.size() == 1) break;
     886                            }
     887                        }
     888                        if (min_item.wt > min_weight) {
     889                            LOGLINE(MATCH, "Setting min_weight to " <<
     890                                    min_item.wt << " from " << min_weight);
     891                            min_weight = min_item.wt;
     892                        }
     893                    }
     894                }
     895            } else {
     896                items.push_back(new_item);
     897                is_heap = false;
     898                if (sort_by == REL && items.size() == max_msize) {
     899                    if (docs_matched >= check_at_least) {
     900                        // We're done if this is a forward boolean match
     901                        // with only one database (bodgetastic, FIXME
     902                        // better if we can!)
     903                        if (rare(max_possible == 0 && sort_forward)) {
     904                            // In the multi database case, MergePostList
     905                            // currently processes each database
     906                            // sequentially (which actually may well be
     907                            // more efficient) so the docids in general
     908                            // won't arrive in order.
     909                            // FIXME: if (leaves.size() == 1) break;
     910                        }
     911                    }
     912                }
     913            }
     914        }
     915
     916        // Keep a track of the greatest weight we've seen.
     917        if (new_item.wt > greatest_wt) {
     918            greatest_wt = new_item.wt;
     919#ifdef XAPIAN_HAS_REMOTE_BACKEND
     920            const unsigned int multiplier = db.internal.size();
     921            unsigned int db_num = (new_item.did - 1) % multiplier;
     922            if (is_remote[db_num]) {
     923                // Note that the greatest weighted document came from a remote
     924                // database, and which one.
     925                greatest_wt_subqs_db_num = db_num;
     926            } else
     927#endif
     928            {
     929                greatest_wt_subqs_matched = pl->count_matching_subqs();
     930#ifdef XAPIAN_HAS_REMOTE_BACKEND
     931                greatest_wt_subqs_db_num = UINT_MAX;
     932#endif
     933            }
     934            if (percent_cutoff) {
     935                Xapian::weight w = new_item.wt * percent_cutoff_factor;
     936                if (w > min_weight) {
     937                    min_weight = w;
     938                    if (!is_heap) {
     939                        is_heap = true;
     940                        make_heap<vector<Xapian::Internal::MSetItem>::iterator,
     941                                  MSetCmp>(items.begin(), items.end(), mcmp);
     942                    }
     943                    while (!items.empty() && items.front().wt < min_weight) {
     944                        pop_heap<vector<Xapian::Internal::MSetItem>::iterator,
     945                                 MSetCmp>(items.begin(), items.end(), mcmp);
     946                        Assert(items.back().wt < min_weight);
     947                        items.pop_back();
     948                    }
     949#ifdef XAPIAN_ASSERTIONS_PARANOID
     950                    vector<Xapian::Internal::MSetItem>::const_iterator i;
     951                    for (i = items.begin(); i != items.end(); ++i) {
     952                        Assert(i->wt >= min_weight);
     953                    }
     954#endif
     955                }
     956            }
     957        }
     958    }
     959
     960
    814961    // done with posting list tree
    815962    delete pl;
    816963
  • matcher/localmatch.cc

     
    8989PostList *
    9090LocalSubMatch::get_postlist_and_term_info(MultiMatch * matcher,
    9191        map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts,
    92         Xapian::termcount * total_subqs_ptr)
     92        Xapian::termcount * total_subqs_ptr,
     93        std::vector<std::string> & pool_terms)
    9394{
    9495    DEBUGCALL(MATCH, PostList *, "LocalSubMatch::get_postlist_and_term_info",
    9596              matcher << ", [termfreqandwts], [total_subqs_ptr]");
     
    9899    // Build the postlist tree for the query.  This calls
    99100    // LocalSubMatch::postlist_from_op_leaf_query() for each term in the query,
    100101    // which builds term_info as a side effect.
    101     QueryOptimiser opt(*db, *this, matcher);
     102    QueryOptimiser opt(*db, *this, matcher, pool_terms);
    102103    PostList * pl = opt.optimise_query(&orig_query);
    103104    *total_subqs_ptr = opt.get_total_subqueries();
    104105
  • matcher/exactphrasecheck.h

     
     1/** @file exactphrasecheck.cc
     2 * @brief Check if terms form a particular exact phrase.
     3 */
     4/* Copyright (C) 2006 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
     6 *
     7 * This program is free software; you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation; either version 2 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * This program is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with this program; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     20 */
     21
     22#ifndef XAPIAN_INCLUDED_EXACTPHRASEPOSTLIST_H
     23#define XAPIAN_INCLUDED_EXACTPHRASEPOSTLIST_H
     24
     25#include "xapian/database.h"
     26
     27#include <string>
     28#include <vector>
     29
     30typedef Xapian::PositionIterator::Internal PositionList;
     31
     32/** Check for an exact phrase using positional information.
     33 *
     34 *  Tests if the terms occur somewhere in the document in the order given
     35 *  and at adjacent term positions.
     36 */
     37class ExactPhraseCheck {
     38    Xapian::Database db;
     39
     40    std::vector<std::string> terms;
     41
     42    PositionList ** poslists;
     43
     44    unsigned * order;
     45
     46    /// Start reading from the i-th position list.
     47    bool start_position_list(unsigned i, Xapian::docid did);
     48
     49  public:
     50    ExactPhraseCheck(const Xapian::Database & db_,
     51                     const std::vector<std::string> &terms_);
     52
     53    ~ExactPhraseCheck();
     54
     55    /// Test if the specified document contains the terms as an exact phrase.
     56    bool operator()(Xapian::docid did);
     57};
     58
     59#endif
  • matcher/localmatch.h

     
    8181    /// Get PostList and term info.
    8282    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    8383        std::map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts,
    84         Xapian::termcount * total_subqs_ptr);
     84        Xapian::termcount * total_subqs_ptr,
     85        std::vector<std::string> & pool_terms);
    8586
    8687    /** Convert a postlist into a synonym postlist.
    8788     */
  • matcher/remotesubmatch.cc

     
    6464PostList *
    6565RemoteSubMatch::get_postlist_and_term_info(MultiMatch *,
    6666        map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts,
    67         Xapian::termcount * total_subqs_ptr)
     67        Xapian::termcount * total_subqs_ptr,
     68        std::vector<std::string> &)
    6869{
    6970    DEBUGCALL(MATCH, PostList *, "RemoteSubMatch::get_postlist_and_term_info",
    7071              "[matcher], " << (void*)termfreqandwts << ", " << (void*)total_subqs_ptr);
  • matcher/queryoptimiser.cc

     
    2929#include "emptypostlist.h"
    3030#include "exactphrasepostlist.h"
    3131#include "externalpostlist.h"
     32#include "leafpostlist.h"
    3233#include "multiandpostlist.h"
    3334#include "multimatch.h"
    3435#include "omassert.h"
     
    4950using namespace std;
    5051
    5152PostList *
    52 QueryOptimiser::do_subquery(const Xapian::Query::Internal * query, double factor)
     53QueryOptimiser::do_subquery(const Xapian::Query::Internal * query, double factor,
     54                            bool top_and)
    5355{
    5456    DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_subquery",
    5557              query << ", " << factor);
     
    7981        case Xapian::Query::OP_FILTER:
    8082        case Xapian::Query::OP_NEAR:
    8183        case Xapian::Query::OP_PHRASE:
    82             RETURN(do_and_like(query, factor));
     84            RETURN(do_and_like(query, factor, top_and));
    8385
    8486        case Xapian::Query::OP_OR:
    8587        case Xapian::Query::OP_XOR:
     
    99101
    100102        case Xapian::Query::OP_AND_NOT: {
    101103            AssertEq(query->subqs.size(), 2);
    102             PostList * l = do_subquery(query->subqs[0], factor);
    103             PostList * r = do_subquery(query->subqs[1], 0.0);
     104            PostList * l = do_subquery(query->subqs[0], factor, top_and);
     105            PostList * r = do_subquery(query->subqs[1], 0.0, false);
    104106            RETURN(new AndNotPostList(l, r, matcher, db_size));
    105107        }
    106108
    107109        case Xapian::Query::OP_AND_MAYBE: {
    108110            AssertEq(query->subqs.size(), 2);
    109             PostList * l = do_subquery(query->subqs[0], factor);
    110             PostList * r = do_subquery(query->subqs[1], factor);
     111            PostList * l = do_subquery(query->subqs[0], factor, top_and);
     112            PostList * r = do_subquery(query->subqs[1], factor, false);
    111113            RETURN(new AndMaybePostList(l, r, matcher, db_size));
    112114        }
    113115
     
    140142            AssertEq(query->subqs.size(), 1);
    141143            double sub_factor = factor;
    142144            if (sub_factor != 0.0) sub_factor *= query->get_dbl_parameter();
    143             RETURN(do_subquery(query->subqs[0], sub_factor));
     145            RETURN(do_subquery(query->subqs[0], sub_factor, top_and));
    144146        }
    145147
    146148        default:
     
    163165};
    164166
    165167PostList *
    166 QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor)
     168QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor,
     169                            bool top_and)
    167170{
    168171    DEBUGCALL(MATCH, PostList *, "QueryOptimiser::do_and_like",
    169172              query << ", " << factor);
     
    195198            pl = new NearPostList(pl, window, terms);
    196199        } else if (window == filter.end - filter.begin) {
    197200            AssertEq(filter.op, Xapian::Query::OP_PHRASE);
    198             pl = new ExactPhrasePostList(pl, terms);
     201            if (top_and) {
     202                vector<PostList *>::const_iterator j;
     203                for (j = terms.begin(); j != terms.end(); ++j) {
     204                    // FIXME: avoid dynamic_cast<> here.
     205                    LeafPostList * lpl = dynamic_cast<LeafPostList*>(*j);
     206                    if (!lpl || lpl->term.empty()) goto cannot_pool;
     207                    pool_terms.push_back(lpl->term);
     208                }
     209                top_and = false;
     210            } else {
     211cannot_pool:
     212                pl = new ExactPhrasePostList(pl, terms);
     213            }
    199214        } else {
    200215            AssertEq(filter.op, Xapian::Query::OP_PHRASE);
    201216            pl = new PhrasePostList(pl, window, terms);
     
    244259        if (is_and_like(subq->op)) {
    245260            do_and_like(subq, factor, and_plists, pos_filters);
    246261        } else {
    247             PostList * pl = do_subquery(subq, factor);
     262            PostList * pl = do_subquery(subq, factor, false);
    248263            and_plists.push_back(pl);
    249264        }
    250265    }
     
    255270        size_t begin = end - queries.size();
    256271        Xapian::termcount window = query->parameter;
    257272
     273        if (window == queries.size()) {
     274        }
    258275        pos_filters.push_back(PosFilter(op, begin, end, window));
    259276    }
    260277}
     
    335352
    336353    Xapian::Query::Internal::subquery_list::const_iterator q;
    337354    for (q = queries.begin(); q != queries.end(); ++q) {
    338         postlists.push_back(do_subquery(*q, factor));
     355        postlists.push_back(do_subquery(*q, factor, false));
    339356    }
    340357
    341358    if (op == Xapian::Query::OP_ELITE_SET) {
  • matcher/queryoptimiser.h

     
    4444
    4545    MultiMatch * matcher;
    4646
     47    std::vector<std::string> & pool_terms;
     48
    4749    /** How many leaf subqueries there are.
    4850     *
    4951     *  Used for scaling percentages when the highest weighted document doesn't
     
    5961     *  @return         A PostList subtree.
    6062     */
    6163    PostList * do_subquery(const Xapian::Query::Internal * query,
    62                            double factor);
     64                           double factor, bool top_and);
    6365
    6466    /** Optimise an AND-like Xapian::Query::Internal subtree into a PostList
    6567     *  subtree.
     
    6971     *
    7072     *  @return         A PostList subtree.
    7173     */
    72     PostList * do_and_like(const Xapian::Query::Internal *query, double factor);
     74    PostList * do_and_like(const Xapian::Query::Internal *query, double factor,
     75                           bool top_and);
    7376
    7477    /** Optimise an AND-like Xapian::Query::Internal subtree into a PostList
    7578     *  subtree.
     
    107110  public:
    108111    QueryOptimiser(const Xapian::Database::Internal & db_,
    109112                   LocalSubMatch & localsubmatch_,
    110                    MultiMatch * matcher_)
     113                   MultiMatch * matcher_,
     114                   std::vector<std::string> & pool_terms_)
    111115        : db(db_), db_size(db.get_doccount()), localsubmatch(localsubmatch_),
    112           matcher(matcher_), total_subqs(0) { }
     116          matcher(matcher_), pool_terms(pool_terms_), total_subqs(0) { }
    113117
    114118    PostList * optimise_query(Xapian::Query::Internal * query) {
    115         return do_subquery(query, 1.0);
     119        return do_subquery(query, 1.0, true);
    116120    }
    117121
    118122    Xapian::termcount get_total_subqueries() const { return total_subqs; }
  • matcher/remotesubmatch.h

     
    7272    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    7373        std::map<std::string,
    7474                 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts,
    75         Xapian::termcount * total_subqs_ptr);
     75        Xapian::termcount * total_subqs_ptr,
     76        std::vector<std::string> & pool_terms);
    7677
    7778    /// Get percentage factor - only valid after get_postlist_and_term_info().
    7879    double get_percent_factor() const { return percent_factor; }
  • common/leafpostlist.h

     
    4747
    4848    bool need_doclength;
    4949
     50  public: // FIXME: avoid having to make term public.
    5051    /// The term name for this postlist ("" for an alldocs postlist).
    5152    std::string term;
    5253
     54  protected:
    5355    /// Only constructable as a base class for derived classes.
    5456    LeafPostList(const std::string & term_)
    5557        : weight(0), need_doclength(false), term(term_) { }
  • common/submatch.h

     
    7676    virtual PostList * get_postlist_and_term_info(MultiMatch *matcher,
    7777        std::map<std::string,
    7878                 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts,
    79         Xapian::termcount * total_subqs_ptr)
     79        Xapian::termcount * total_subqs_ptr,
     80        std::vector<std::string> & pool_terms)
    8081        = 0;
    8182};
    8283