Ticket #608: ticket-608-max-wildcard.patch

File ticket-608-max-wildcard.patch, 3.2 KB (added by Olly Betts, 12 years ago)

updated patch against trunk

  • xapian-core/queryparser/queryparser.lemony

    diff --git a/xapian-core/queryparser/queryparser.lemony b/xapian-core/queryparser/queryparser.lemony
    index ec4538a..68252b0 100644
    a b Term::as_wildcarded_query(State * state_) const  
    481481    return q;
    482482}
    483483
     484struct TermF {
     485    string term;
     486    Xapian::termcount freq;
     487
     488    TermF(const string & t, Xapian::termcount f) : term(t), freq(f) { }
     489};
     490
     491struct CompareFreq {
     492    bool operator()(const TermF & a, const TermF & b) {
     493        return a.freq > b.freq;
     494    }
     495};
     496
     497// Iterator shim to allow building a query from vector<TermF>.
     498class TermFIterator {
     499    vector<TermF>::const_iterator i;
     500
     501    Xapian::termpos pos;
     502
     503  public:
     504    TermFIterator(vector<TermF>::const_iterator i_, Xapian::termpos pos_ = 0)
     505        : i(i_), pos(pos_) { }
     506
     507    TermFIterator & operator++() {
     508        ++i;
     509        return *this;
     510    }
     511
     512    const Xapian::Query operator*() const {
     513        return Xapian::Query(i->term, 1, pos);
     514    }
     515
     516    bool operator==(const TermFIterator & o) {
     517        return i == o.i;
     518    }
     519
     520    bool operator!=(const TermFIterator & o) {
     521        return !(*this == o);
     522    }
     523
     524    typedef std::input_iterator_tag iterator_category;
     525    typedef Xapian::Query value_type;
     526    typedef Xapian::termcount_diff difference_type;
     527    typedef Xapian::Query * pointer;
     528    typedef Xapian::Query & reference;
     529};
     530
    484531Query *
    485532Term::as_partial_query(State * state_) const
    486533{
    487534    const Database & db = state_->get_database();
    488     vector<Query> subqs_partial; // A synonym of all the partial terms.
    489535    vector<Query> subqs_full; // A synonym of all the full terms.
    490536
     537    Xapian::termcount max_k = state_->get_max_wildcard_expansion();
     538    if (max_k == 0) max_k = 100;
     539    Xapian::termcount min_freq = static_cast<Xapian::termcount>(-1);
     540
     541    vector<TermF> terms;
     542    Xapian::termcount k = 0;
     543
    491544    const list<string> & prefixes = field_info->prefixes;
    492545    list<string>::const_iterator piter;
    493546    for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
    Term::as_partial_query(State * state_) const  
    495548        root += name;
    496549        TermIterator t = db.allterms_begin(root);
    497550        while (t != db.allterms_end(root)) {
    498             subqs_partial.push_back(Query(*t, 1, pos));
     551            Xapian::termcount freq = t.get_termfreq();
     552            if (k < max_k) {
     553                terms.push_back(TermF(*t, freq));
     554            } else {
     555                if (k == max_k) {
     556                    make_heap(terms.begin(), terms.end(), CompareFreq());
     557                    min_freq = terms.front().freq;
     558                }
     559
     560                if (min_freq < freq) {
     561                    pop_heap(terms.begin(),terms.end(), CompareFreq());
     562                    terms.pop_back();
     563                    terms.push_back(TermF(*t, freq));
     564                    push_heap(terms.begin(), terms.end(), CompareFreq());
     565                    min_freq = terms.front().freq;
     566                }
     567            }
     568
     569            ++k;
    499570            ++t;
    500571        }
    501572        // Add the term, as it would normally be handled, as an alternative.
    502573        subqs_full.push_back(Query(make_term(*piter), 1, pos));
    503574    }
     575
    504576    Query * q = new Query(Query::OP_OR,
    505577                          Query(Query::OP_SYNONYM,
    506                                 subqs_partial.begin(), subqs_partial.end()),
     578                                TermFIterator(terms.begin(), pos),
     579                                TermFIterator(terms.end())),
    507580                          Query(Query::OP_SYNONYM,
    508581                                subqs_full.begin(), subqs_full.end()));
    509582    delete this;