Ticket #608: x.patch

File x.patch, 2.1 KB (added by Greg, 12 years ago)

A barebone implementation, sorting terms based on their frequency

  • queryparser.lemony

    diff -ubB v1/queryparser.lemony v2/queryparser.lemony
    v1 v2  
    412412    return q;
    413413}
    414414
     415class TermF
     416{
     417    public:
     418        string term;
     419        Xapian::termcount freq;
     420
     421        TermF(string t, Xapian::termcount f) {
     422            term = t;
     423            freq = f;
     424        }
     425};
     426
     427struct CompareFreq {
     428    bool operator()(const TermF a, const TermF b) {
     429    return a.freq > b.freq;
     430    }
     431};
     432
    415433Query *
    416434Term::as_partial_query(State * state_) const
    417435{
     
    419437    vector<Query> subqs_partial; // A synonym of all the partial terms.
    420438    vector<Query> subqs_full; // A synonym of all the full terms.
    421439
     440    unsigned int maxK = state_->get_max_wildcard_expansion();
     441    if(maxK == 0) maxK = 100;
     442    unsigned int K = 0;
     443    Xapian::termcount min_freq = 1000000;
     444    Xapian::termcount freq = 0;
     445
    422446    const list<string> & prefixes = prefix_info->prefixes;
    423447    list<string>::const_iterator piter;
     448
    424449    for (piter = prefixes.begin(); piter != prefixes.end(); ++piter) {
    425450        string root = *piter;
    426451        root += name;
     
    425450        string root = *piter;
    426451        root += name;
    427452        TermIterator t = db.allterms_begin(root);
     453        vector<TermF> terms;
     454
    428455        while (t != db.allterms_end(root)) {
    429             subqs_partial.push_back(Query(*t, 1, pos));
     456            freq = t.get_termfreq();
     457            if(K < maxK) {
     458                if(min_freq > freq)
     459                    min_freq = freq;
     460
     461                terms.push_back(TermF(*t, freq));
     462            }
     463            else {
     464                if(K == maxK) {
     465                    make_heap( terms.begin(), terms.end(), CompareFreq() );
     466                }
     467
     468                if(min_freq < freq) {
     469                    pop_heap( terms.begin(),terms.end(), CompareFreq() ); terms.pop_back();
     470                    terms.push_back( TermF(*t, freq) ); push_heap( terms.begin(), terms.end(), CompareFreq() );
     471                    min_freq = terms.front().freq;
     472                }
     473            }
     474
     475            ++K;
    430476            ++t;
    431477        }
     478
     479        for(unsigned int j = 0; j < terms.size(); j++)
     480        {
     481            subqs_partial.push_back(Query(terms[j].term, 1, pos));
     482        }
     483
    432484        // Add the term, as it would normally be handled, as an alternative.
    433485        subqs_full.push_back(Query(make_term(*piter), 1, pos));
    434486    }