Ticket #279: remotepercentscale.patch

File remotepercentscale.patch, 4.8 KB (added by Richard Boulton, 16 years ago)

Patch special-casing the percentage calculation for remote databases

  • matcher/multimatch.cc

     
    779779        vector<Xapian::Internal::MSetItem>::const_iterator best;
    780780        best = min_element(items.begin(), items.end(), mcmp);
    781781
    782         if (termfreqandwts.size() > 1) {
    783             Xapian::termcount matching_terms = 0;
    784             map<string,
    785                 Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator i;
     782        unsigned int multiplier = db.internal.size();
     783        Assert(multiplier != 0);
     784        Xapian::doccount n = (best->did - 1) % multiplier; // which actual database
     785        // If the top result is from a remote database, then we can
     786        // just use the percentage scaling calculated for that
     787        // database.
     788        if (is_remote[n]) {
     789            RemoteSubMatch * rem_match;
     790            rem_match = static_cast<RemoteSubMatch*>(leaves[n].get());
     791            percent_scale = rem_match->get_percent_factor();
     792        } else {
     793            if (termfreqandwts.size() > 1) {
     794                Xapian::termcount matching_terms = 0;
     795                map<string,
     796                    Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator i;
    786797
    787             Xapian::TermIterator docterms = db.termlist_begin(best->did);
    788             Xapian::TermIterator docterms_end = db.termlist_end(best->did);
    789             while (docterms != docterms_end) {
    790                 i = termfreqandwts.find(*docterms);
     798                Xapian::TermIterator docterms = db.termlist_begin(best->did);
     799                Xapian::TermIterator docterms_end = db.termlist_end(best->did);
     800                while (docterms != docterms_end) {
     801                    i = termfreqandwts.find(*docterms);
     802                    if (i != termfreqandwts.end()) {
     803                        percent_scale += i->second.termweight;
     804                        ++matching_terms;
     805                        if (matching_terms == termfreqandwts.size()) break;
     806                    }
     807                    ++docterms;
     808                }
     809                // Special case for MatchAll queries
     810                i = termfreqandwts.find("");
    791811                if (i != termfreqandwts.end()) {
    792812                    percent_scale += i->second.termweight;
    793813                    ++matching_terms;
    794                     if (matching_terms == termfreqandwts.size()) break;
    795814                }
    796                 ++docterms;
    797             }
    798             // Special case for MatchAll queries
    799             i = termfreqandwts.find("");
    800             if (i != termfreqandwts.end()) {
    801                 percent_scale += i->second.termweight;
    802                 ++matching_terms;
    803             }
    804             if (matching_terms < termfreqandwts.size()) {
    805                 // OK, work out weight corresponding to 100%
    806                 double denom = 0;
    807                 for (i = termfreqandwts.begin(); i != termfreqandwts.end(); ++i)
    808                     denom += i->second.termweight;
     815                if (matching_terms < termfreqandwts.size()) {
     816                    // OK, work out weight corresponding to 100%
     817                    double denom = 0;
     818                    for (i = termfreqandwts.begin(); i != termfreqandwts.end(); ++i)
     819                        denom += i->second.termweight;
    809820
    810                 DEBUGLINE(MATCH, "denom = " << denom << " percent_scale = " << percent_scale);
    811                 Assert(percent_scale <= denom);
    812                 denom *= greatest_wt;
    813                 Assert(denom > 0);
    814                 percent_scale /= denom;
     821                    DEBUGLINE(MATCH, "denom = " << denom << " percent_scale = " << percent_scale);
     822                    Assert(percent_scale <= denom);
     823                    denom *= greatest_wt;
     824                    Assert(denom > 0);
     825                    percent_scale /= denom;
     826                } else {
     827                    // If all the terms match, the 2 sums of weights cancel
     828                    percent_scale = 1.0 / greatest_wt;
     829                }
    815830            } else {
    816                 // If all the terms match, the 2 sums of weights cancel
     831                // If there's only a single term in the query, the top document
     832                // must score 100%.
    817833                percent_scale = 1.0 / greatest_wt;
    818834            }
    819         } else {
    820             // If there's only a single term in the query, the top document
    821             // must score 100%.
    822             percent_scale = 1.0 / greatest_wt;
    823835        }
    824836        Assert(percent_scale > 0);
    825837        if (percent_cutoff) {
  • matcher/remotesubmatch.cc

     
    6363              "[matcher], " << (void*)termfreqandwts);
    6464    Xapian::MSet mset;
    6565    db->get_mset(mset);
     66    percent_factor = mset.internal->percent_factor;
    6667    if (termfreqandwts) *termfreqandwts = mset.internal->termfreqandwts;
    6768    return new MSetPostList(mset, decreasing_relevance);
    6869}
  • matcher/remotesubmatch.h

     
    4444     */
    4545    bool decreasing_relevance;
    4646
     47    /// The factor to use to convert weights to percentages.
     48    double percent_factor;
     49
    4750  public:
    4851    /// Constructor.
    4952    RemoteSubMatch(RemoteDatabase *db_, bool decreasing_relevance_);
     
    6164    PostList * get_postlist_and_term_info(MultiMatch *matcher,
    6265        map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts);
    6366
     67    /// Get percentage factor - only valid after get_postlist_and_term_info().
     68    double get_percent_factor() const { return percent_factor; }
     69
    6470    /// Short-cut for single remote match.
    6571    void get_mset(Xapian::MSet & mset) { db->get_mset(mset); }
    6672};