Ticket #394: phrase-settling-pond-update.patch

File phrase-settling-pond-update.patch, 23.5 KB (added by Olly Betts, 13 years ago)

Patch updated to SVN trunk r16092

  • matcher/Makefile.mk

     
    44        matcher/branchpostlist.h\
    55        matcher/collapser.h\
    66        matcher/const_database_wrapper.h\
     7        matcher/exactphrasecheck.h\
    78        matcher/exactphrasepostlist.h\
    8         matcher/externalpostlist.h\
    9         matcher/extraweightpostlist.h\
    10         matcher/localsubmatch.h\
    11         matcher/mergepostlist.h\
    12         matcher/msetcmp.h\
    139        matcher/msetpostlist.h\
    1410        matcher/multiandpostlist.h\
    1511        matcher/multixorpostlist.h\
     
    4137        matcher/branchpostlist.cc\
    4238        matcher/collapser.cc\
    4339        matcher/const_database_wrapper.cc\
     40        matcher/exactphrasecheck.cc\
    4441        matcher/exactphrasepostlist.cc\
    4542        matcher/externalpostlist.cc\
    4643        matcher/localsubmatch.cc\
  • matcher/exactphrasecheck.cc

     
     1/** @file exactphrasecheck.cc
     2 * @brief Check if terms form a particular exact phrase.
     3 */
     4/* Copyright (C) 2006,2007,2009 Olly Betts
     5 *
     6 * This program is free software; you can redistribute it and/or modify
     7 * it under the terms of the GNU General Public License as published by
     8 * the Free Software Foundation; either version 2 of the License, or
     9 * (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     19 */
     20
     21// FIXME: this could probably share code with ExactPhrasePostList.
     22
     23#include <config.h>
     24
     25#include "exactphrasecheck.h"
     26
     27#include "debuglog.h"
     28#include "omassert.h"
     29#include "positionlist.h"
     30
     31#include <algorithm>
     32#include <vector>
     33
     34class TermCompare {
     35    const Xapian::Database & db;
     36    vector<string> & terms;
     37
     38  public:
     39    TermCompare(const Xapian::Database & db_,
     40                vector<string> & terms_)
     41        : db(db_), terms(terms_) { }
     42
     43    bool operator()(unsigned a, unsigned b) const {
     44        return db.get_collection_freq(terms[a]) < db.get_collection_freq(terms[b]);
     45    }
     46};
     47
     48ExactPhraseCheck::ExactPhraseCheck(const Xapian::Database & db_,
     49                                   const vector<string> &terms_)
     50    : db(db_), terms(terms_)
     51{
     52    if (terms.empty()) {
     53        poslists = NULL;
     54        order = NULL;
     55        return;
     56    }
     57
     58    AssertRel(terms.size(),>,1);
     59    size_t n = terms_.size();
     60    poslists = new PositionList*[n];
     61    try {
     62        order = new unsigned[n];
     63    } catch (...) {
     64        delete [] poslists;
     65        throw;
     66    }
     67    for (size_t i = 0; i < n; ++i) {
     68        poslists[i] = NULL;
     69        order[i] = unsigned(i);
     70    }
     71
     72    // We often don't need to read all the position lists, so rather than using
     73    // the shortest position lists first, we approximate by using the terms
     74    // with the lowest collection freq first.  Overall this should give a
     75    // similar order.
     76    sort(order, order + terms.size(), TermCompare(db, terms));
     77}
     78
     79ExactPhraseCheck::~ExactPhraseCheck()
     80{
     81    delete [] poslists;
     82    delete [] order;
     83}
     84
     85bool
     86ExactPhraseCheck::start_position_list(unsigned i, Xapian::docid did)
     87{
     88    AssertRel(i,<,terms.size());
     89    unsigned index = order[i];
     90    // FIXME: nasty hacking around with internals and ref counts - we should
     91    // just add a new Database::Internal method to do what we want.
     92    Xapian::PositionIterator p = db.positionlist_begin(did, terms[index]);
     93    PositionList * tmp = p.internal;
     94    if (!tmp)
     95        return false;
     96    ++tmp->_refs;
     97    p.internal = poslists[i];
     98    poslists[i] = tmp;
     99    poslists[i]->index = index;
     100    return true;
     101}
     102
     103bool
     104ExactPhraseCheck::operator()(Xapian::docid did)
     105{
     106    LOGCALL(MATCH, bool, "ExactPhraseCheck::operator()", did);
     107
     108    if (terms.size() <= 1) RETURN(true);
     109
     110    // We often don't need to read all the position lists, so rather than using
     111
     112    AssertRel(terms.size(),>,1);
     113
     114    bool result = false;
     115    // If the first term we check only occurs too close to the start of the
     116    // document, we only need to read one term's positions.  E.g. search for
     117    // "ripe mango" when the only occurrence of 'mango' in the current document
     118    // is at position 0.
     119    if (!start_position_list(0, did))
     120        goto done;
     121    poslists[0]->skip_to(poslists[0]->index);
     122    if (poslists[0]->at_end()) goto done;
     123
     124    // If we get here, we'll need to read the positionlists for at least two
     125    // terms, so check the true positionlist length for the two terms with the
     126    // lowest wdf and if necessary swap them so the true shorter one is first.
     127    if (!start_position_list(1, did))
     128        goto done;
     129    if (poslists[0]->get_size() < poslists[1]->get_size()) {
     130        poslists[1]->skip_to(poslists[1]->index);
     131        if (poslists[1]->at_end()) goto done;
     132        swap(poslists[0], poslists[1]);
     133    }
     134
     135    {
     136        unsigned read_hwm = 1;
     137        Xapian::termpos idx0 = poslists[0]->index;
     138        do {
     139            Xapian::termpos base = poslists[0]->get_position() - idx0;
     140            unsigned i = 1;
     141            while (true) {
     142                if (i > read_hwm) {
     143                    read_hwm = i;
     144                    if (!start_position_list(i, did))
     145                        goto done;
     146                    // FIXME: consider comparing with poslist[0] and swapping
     147                    // if less common.  Should we allow for the number of positions
     148                    // we've read from poslist[0] already?
     149                }
     150                Xapian::termpos required = base + poslists[i]->index;
     151                poslists[i]->skip_to(required);
     152                if (poslists[i]->at_end()) goto done;
     153                if (poslists[i]->get_position() != required) break;
     154                if (++i == terms.size()) {
     155                    result = true;
     156                    goto done;
     157                }
     158            }
     159            poslists[0]->next();
     160        } while (!poslists[0]->at_end());
     161    }
     162done:
     163    for (size_t i = 0; i < terms.size(); ++i) {
     164        delete poslists[i];
     165        poslists[i] = NULL;
     166    }
     167    RETURN(result);
     168}
  • matcher/multimatch.cc

     
    4949#include "valuestreamdocument.h"
    5050#include "weightinternal.h"
    5151
     52#include "exactphrasecheck.h"
     53
    5254#include <xapian/errorhandler.h>
    5355#include <xapian/matchspy.h>
    5456#include <xapian/version.h> // For XAPIAN_HAS_REMOTE_BACKEND
     
    359361    map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts_ptr;
    360362    termfreqandwts_ptr = &termfreqandwts;
    361363
     364    vector<string> pool_terms;
    362365    Xapian::termcount total_subqs = 0;
    363366    // Keep a count of matches which we know exist, but we won't see.  This
    364367    // occurs when a submatch is remote, and returns a lower bound on the
     
    368371    for (size_t i = 0; i != leaves.size(); ++i) {
    369372        PostList *pl;
    370373        try {
     374            if (!is_remote[i]) pool_terms.clear();
    371375            pl = leaves[i]->get_postlist_and_term_info(this,
    372376                                                       termfreqandwts_ptr,
    373                                                        &total_subqs);
     377                                                       &total_subqs,
     378                                                       pool_terms);
    374379            if (termfreqandwts_ptr && !termfreqandwts.empty())
    375380                termfreqandwts_ptr = NULL;
    376381            if (is_remote[i]) {
     
    529534    // Is the mset a valid heap?
    530535    bool is_heap = false;
    531536
     537    size_t SETTLING_POND_SIZE = 0;
     538    if (!pool_terms.empty()) {
     539        const char * sps = getenv("POND_SIZE");
     540        SETTLING_POND_SIZE = sps ? atoi(sps) : 100000;
     541    }
     542    ExactPhraseCheck phrase_check(db, pool_terms);
     543    // FIXME: a min/max heap is probably a better choice here (notably more
     544    // compact) but the STL doesn't provide one so we'd have to find an
     545    // implementation or write one.
     546    multimap<Xapian::weight, Xapian::Internal::MSetItem> settling_pond;
    532547    while (true) {
    533548        bool pushback;
    534549
     
    650665            new_item.wt = wt;
    651666        }
    652667
     668        if (SETTLING_POND_SIZE) {
     669            if (items.size() >= max_msize) {
     670                // Settling pond handling...
     671                multimap<Xapian::weight, Xapian::Internal::MSetItem>::iterator it;
     672                it = settling_pond.upper_bound(-min_weight);
     673                settling_pond.erase(it, settling_pond.end());
     674
     675                settling_pond.insert(make_pair(-new_item.wt, new_item));
     676                if (settling_pond.size() < SETTLING_POND_SIZE) {
     677                    continue;
     678                }
     679
     680                // Take the last item off the heap, which will have a reasonably
     681                // high weight in general.
     682                it = settling_pond.begin();
     683                swap(new_item, it->second);
     684                settling_pond.erase(it);
     685            }
     686            if (!phrase_check(new_item.did)) continue;
     687        }
     688
    653689        pushback = true;
    654690
    655691        // Perform collapsing on key if requested.
     
    812848        }
    813849    }
    814850
     851    multimap<Xapian::weight, Xapian::Internal::MSetItem>::iterator it;
     852    for (it = settling_pond.begin(); it != settling_pond.end(); ++it) {
     853        const Xapian::Internal::MSetItem & new_item = it->second;
     854        if (new_item.wt < min_weight) break;
     855        if (!phrase_check(new_item.did)) continue;
     856
     857        {
     858            ++docs_matched;
     859            if (items.size() >= max_msize) {
     860                items.push_back(new_item);
     861                if (!is_heap) {
     862                    is_heap = true;
     863                    make_heap(items.begin(), items.end(), mcmp);
     864                } else {
     865                    push_heap<vector<Xapian::Internal::MSetItem>::iterator,
     866                              MSetCmp>(items.begin(), items.end(), mcmp);
     867                }
     868                pop_heap<vector<Xapian::Internal::MSetItem>::iterator,
     869                         MSetCmp>(items.begin(), items.end(), mcmp);
     870                items.pop_back();
     871
     872                min_item = items.front();
     873                if (sort_by == REL || sort_by == REL_VAL) {
     874                    if (docs_matched >= check_at_least) {
     875                        if (sort_by == REL) {
     876                            // We're done if this is a forward boolean match
     877                            // with only one database (bodgetastic, FIXME
     878                            // better if we can!)
     879                            if (rare(max_possible == 0 && sort_forward)) {
     880                                // In the multi database case, MergePostList
     881                                // currently processes each database
     882                                // sequentially (which actually may well be
     883                                // more efficient) so the docids in general
     884                                // won't arrive in order.
     885                                // FIXME: is this still good here:
     886                                // if (leaves.size() == 1) break;
     887                            }
     888                        }
     889                        if (min_item.wt > min_weight) {
     890                            LOGLINE(MATCH, "Setting min_weight to " <<
     891                                    min_item.wt << " from " << min_weight);
     892                            min_weight = min_item.wt;
     893                        }
     894                    }
     895                }
     896            } else {
     897                items.push_back(new_item);
     898                is_heap = false;
     899                if (sort_by == REL && items.size() == max_msize) {
     900                    if (docs_matched >= check_at_least) {
     901                        // We're done if this is a forward boolean match
     902                        // with only one database (bodgetastic, FIXME
     903                        // better if we can!)
     904                        if (rare(max_possible == 0 && sort_forward)) {
     905                            // In the multi database case, MergePostList
     906                            // currently processes each database
     907                            // sequentially (which actually may well be
     908                            // more efficient) so the docids in general
     909                            // won't arrive in order.
     910                            // FIXME: if (leaves.size() == 1) break;
     911                        }
     912                    }
     913                }
     914            }
     915        }
     916
     917        // Keep a track of the greatest weight we've seen.
     918        if (new_item.wt > greatest_wt) {
     919            greatest_wt = new_item.wt;
     920#ifdef XAPIAN_HAS_REMOTE_BACKEND
     921            const unsigned int multiplier = db.internal.size();
     922            unsigned int db_num = (new_item.did - 1) % multiplier;
     923            if (is_remote[db_num]) {
     924                // Note that the greatest weighted document came from a remote
     925                // database, and which one.
     926                greatest_wt_subqs_db_num = db_num;
     927            } else
     928#endif
     929            {
     930                greatest_wt_subqs_matched = pl->count_matching_subqs();
     931#ifdef XAPIAN_HAS_REMOTE_BACKEND
     932                greatest_wt_subqs_db_num = UINT_MAX;
     933#endif
     934            }
     935            if (percent_cutoff) {
     936                Xapian::weight w = new_item.wt * percent_cutoff_factor;
     937                if (w > min_weight) {
     938                    min_weight = w;
     939                    if (!is_heap) {
     940                        is_heap = true;
     941                        make_heap<vector<Xapian::Internal::MSetItem>::iterator,
     942                                  MSetCmp>(items.begin(), items.end(), mcmp);
     943                    }
     944                    while (!items.empty() && items.front().wt < min_weight) {
     945                        pop_heap<vector<Xapian::Internal::MSetItem>::iterator,
     946                                 MSetCmp>(items.begin(), items.end(), mcmp);
     947                        Assert(items.back().wt < min_weight);
     948                        items.pop_back();
     949                    }
     950#ifdef XAPIAN_ASSERTIONS_PARANOID
     951                    vector<Xapian::Internal::MSetItem>::const_iterator i;
     952                    for (i = items.begin(); i != items.end(); ++i) {
     953                        Assert(i->wt >= min_weight);
     954                    }
     955#endif
     956                }
     957            }
     958        }
     959    }
     960
     961
    815962    // done with posting list tree
    816963    pl.reset(NULL);
    817964
  • matcher/exactphrasecheck.h

     
     1/** @file exactphrasecheck.cc
     2 * @brief Check if terms form a particular exact phrase.
     3 */
     4/* Copyright (C) 2006 Olly Betts
     5 * Copyright (C) 2009 Lemur Consulting Ltd
     6 *
     7 * This program is free software; you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation; either version 2 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * This program is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with this program; if not, write to the Free Software
     19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     20 */
     21
     22#ifndef XAPIAN_INCLUDED_EXACTPHRASEPOSTLIST_H
     23#define XAPIAN_INCLUDED_EXACTPHRASEPOSTLIST_H
     24
     25#include "xapian/database.h"
     26
     27#include <string>
     28#include <vector>
     29
     30typedef Xapian::PositionIterator::Internal PositionList;
     31
     32/** Check for an exact phrase using positional information.
     33 *
     34 *  Tests if the terms occur somewhere in the document in the order given
     35 *  and at adjacent term positions.
     36 */
     37class ExactPhraseCheck {
     38    Xapian::Database db;
     39
     40    std::vector<std::string> terms;
     41
     42    PositionList ** poslists;
     43
     44    unsigned * order;
     45
     46    /// Start reading from the i-th position list.
     47    bool start_position_list(unsigned i, Xapian::docid did);
     48
     49  public:
     50    ExactPhraseCheck(const Xapian::Database & db_,
     51                     const std::vector<std::string> &terms_);
     52
     53    ~ExactPhraseCheck();
     54
     55    /// Test if the specified document contains the terms as an exact phrase.
     56    bool operator()(Xapian::docid did);
     57};
     58
     59#endif
  • matcher/localsubmatch.cc

     
    6969PostList *
    7070LocalSubMatch::get_postlist_and_term_info(MultiMatch * matcher,
    7171        map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts,
    72         Xapian::termcount * total_subqs_ptr)
     72        Xapian::termcount * total_subqs_ptr,
     73        std::vector<std::string> & pool_terms)
    7374{
    7475    LOGCALL(MATCH, PostList *, "LocalSubMatch::get_postlist_and_term_info", matcher | termfreqandwts | total_subqs_ptr);
    7576    (void)matcher;
     
    7879    // Build the postlist tree for the query.  This calls
    7980    // LocalSubMatch::postlist_from_op_leaf_query() for each term in the query,
    8081    // which builds term_info as a side effect.
    81     QueryOptimiser opt(*db, *this, matcher);
     82    QueryOptimiser opt(*db, *this, matcher, pool_terms);
    8283    PostList * pl = opt.optimise_query(query);
    8384    *total_subqs_ptr = opt.get_total_subqueries();
    8485
  • matcher/remotesubmatch.cc

     
    6262PostList *
    6363RemoteSubMatch::get_postlist_and_term_info(MultiMatch *,
    6464        map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts,
    65         Xapian::termcount * total_subqs_ptr)
     65        Xapian::termcount * total_subqs_ptr,
     66        std::vector<std::string> &)
    6667{
    6768    LOGCALL(MATCH, PostList *, "RemoteSubMatch::get_postlist_and_term_info", Literal("[matcher]") | termfreqandwts | total_subqs_ptr);
    6869    Xapian::MSet mset;
  • matcher/localsubmatch.h

     
    8989    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    9090        std::map<std::string,
    9191                 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts,
    92         Xapian::termcount * total_subqs_ptr);
     92        Xapian::termcount * total_subqs_ptr,
     93        std::vector<std::string> & pool_terms);
    9394
    9495    /** Convert a postlist into a synonym postlist.
    9596     */
  • matcher/queryoptimiser.cc

     
    3030#include "emptypostlist.h"
    3131#include "exactphrasepostlist.h"
    3232#include "externalpostlist.h"
     33#include "leafpostlist.h"
    3334#include "multiandpostlist.h"
    3435#include "multimatch.h"
    3536#include "multixorpostlist.h"
     
    4950using namespace std;
    5051
    5152PostList *
    52 QueryOptimiser::do_subquery(const Xapian::Query::Internal * query, double factor)
     53QueryOptimiser::do_subquery(const Xapian::Query::Internal * query, double factor,
     54                            bool top_and)
    5355{
    5456    LOGCALL(MATCH, PostList *, "QueryOptimiser::do_subquery", query | factor);
    5557
     
    7880        case Xapian::Query::OP_FILTER:
    7981        case Xapian::Query::OP_NEAR:
    8082        case Xapian::Query::OP_PHRASE:
    81             RETURN(do_and_like(query, factor));
     83            RETURN(do_and_like(query, factor, top_and));
    8284
    8385        case Xapian::Query::OP_OR:
    8486        case Xapian::Query::OP_XOR:
     
    98100
    99101        case Xapian::Query::OP_AND_NOT: {
    100102            AssertEq(query->subqs.size(), 2);
    101             PostList * l = do_subquery(query->subqs[0], factor);
    102             PostList * r = do_subquery(query->subqs[1], 0.0);
     103            PostList * l = do_subquery(query->subqs[0], factor, top_and);
     104            PostList * r = do_subquery(query->subqs[1], 0.0, false);
    103105            RETURN(new AndNotPostList(l, r, matcher, db_size));
    104106        }
    105107
    106108        case Xapian::Query::OP_AND_MAYBE: {
    107109            AssertEq(query->subqs.size(), 2);
    108             PostList * l = do_subquery(query->subqs[0], factor);
    109             PostList * r = do_subquery(query->subqs[1], factor);
     110            PostList * l = do_subquery(query->subqs[0], factor, top_and);
     111            PostList * r = do_subquery(query->subqs[1], factor, false);
    110112            RETURN(new AndMaybePostList(l, r, matcher, db_size));
    111113        }
    112114
     
    153155            AssertEq(query->subqs.size(), 1);
    154156            double sub_factor = factor;
    155157            if (sub_factor != 0.0) sub_factor *= query->get_dbl_parameter();
    156             RETURN(do_subquery(query->subqs[0], sub_factor));
     158            RETURN(do_subquery(query->subqs[0], sub_factor, top_and));
    157159        }
    158160
    159161        default:
     
    176178};
    177179
    178180PostList *
    179 QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor)
     181QueryOptimiser::do_and_like(const Xapian::Query::Internal *query, double factor,
     182                            bool top_and)
    180183{
    181184    LOGCALL(MATCH, PostList *, "QueryOptimiser::do_and_like", query | factor);
    182185
     
    206209        } else if (window == filter.end - filter.begin) {
    207210            AssertEq(filter.op, Xapian::Query::OP_PHRASE);
    208211            pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
     212            if (top_and) {
     213                vector<PostList *>::const_iterator j;
     214                for (j = terms_begin; j != terms_end; ++j) {
     215                    // FIXME: avoid dynamic_cast<> here.
     216                    LeafPostList * lpl = dynamic_cast<LeafPostList*>(*j);
     217                    if (!lpl || lpl->term.empty()) goto cannot_pool;
     218                    pool_terms.push_back(lpl->term);
     219                }
     220                top_and = false;
     221            } else {
     222cannot_pool:
     223                pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
     224            }
    209225        } else {
    210226            AssertEq(filter.op, Xapian::Query::OP_PHRASE);
    211227            pl = new PhrasePostList(pl, window, terms_begin, terms_end);
     
    253269        if (is_and_like(subq->op)) {
    254270            do_and_like(subq, factor, and_plists, pos_filters);
    255271        } else {
    256             PostList * pl = do_subquery(subq, factor);
     272            PostList * pl = do_subquery(subq, factor, false);
    257273            and_plists.push_back(pl);
    258274        }
    259275    }
     
    264280        size_t begin = end - queries.size();
    265281        Xapian::termcount window = query->parameter;
    266282
     283        if (window == queries.size()) {
     284        }
    267285        pos_filters.push_back(PosFilter(op, begin, end, window));
    268286    }
    269287}
     
    347365
    348366    Xapian::Query::Internal::subquery_list::const_iterator q;
    349367    for (q = queries.begin(); q != queries.end(); ++q) {
    350         postlists.push_back(do_subquery(*q, factor));
     368        postlists.push_back(do_subquery(*q, factor, false));
    351369    }
    352370
    353371    if (op == Xapian::Query::OP_XOR) {
  • matcher/queryoptimiser.h

     
    4444
    4545    MultiMatch * matcher;
    4646
     47    std::vector<std::string> & pool_terms;
     48
    4749    /** How many leaf subqueries there are.
    4850     *
    4951     *  Used for scaling percentages when the highest weighted document doesn't
     
    5961     *  @return         A PostList subtree.
    6062     */
    6163    PostList * do_subquery(const Xapian::Query::Internal * query,
    62                            double factor);
     64                           double factor, bool top_and);
    6365
    6466    /** Optimise an AND-like Xapian::Query::Internal subtree into a PostList
    6567     *  subtree.
     
    6971     *
    7072     *  @return         A PostList subtree.
    7173     */
    72     PostList * do_and_like(const Xapian::Query::Internal *query, double factor);
     74    PostList * do_and_like(const Xapian::Query::Internal *query, double factor,
     75                           bool top_and);
    7376
    7477    /** Optimise an AND-like Xapian::Query::Internal subtree into a PostList
    7578     *  subtree.
     
    107110  public:
    108111    QueryOptimiser(const Xapian::Database::Internal & db_,
    109112                   LocalSubMatch & localsubmatch_,
    110                    MultiMatch * matcher_)
     113                   MultiMatch * matcher_,
     114                   std::vector<std::string> & pool_terms_)
    111115        : db(db_), db_size(db.get_doccount()), localsubmatch(localsubmatch_),
    112           matcher(matcher_), total_subqs(0) { }
     116          matcher(matcher_), pool_terms(pool_terms_), total_subqs(0) { }
    113117
    114118    PostList * optimise_query(const Xapian::Query::Internal * query) {
    115         return do_subquery(query, 1.0);
     119        return do_subquery(query, 1.0, true);
    116120    }
    117121
    118122    Xapian::termcount get_total_subqueries() const { return total_subqs; }
  • matcher/remotesubmatch.h

     
    7272    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    7373        std::map<std::string,
    7474                 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts,
    75         Xapian::termcount * total_subqs_ptr);
     75        Xapian::termcount * total_subqs_ptr,
     76        std::vector<std::string> & pool_terms);
    7677
    7778    /// Get percentage factor - only valid after get_postlist_and_term_info().
    7879    double get_percent_factor() const { return percent_factor; }
  • common/leafpostlist.h

     
    4747
    4848    bool need_doclength;
    4949
     50  public: // FIXME: avoid having to make term public.
    5051    /// The term name for this postlist (empty for an alldocs postlist).
    5152    std::string term;
    5253
     54  protected:
    5355    /// Only constructable as a base class for derived classes.
    5456    LeafPostList(const std::string & term_)
    5557        : weight(0), need_doclength(false), term(term_) { }
  • common/submatch.h

     
    7676    virtual PostList * get_postlist_and_term_info(MultiMatch *matcher,
    7777        std::map<std::string,
    7878                 Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts,
    79         Xapian::termcount * total_subqs_ptr)
     79        Xapian::termcount * total_subqs_ptr,
     80        std::vector<std::string> & pool_terms)
    8081        = 0;
    8182};
    8283