Ticket #279: remotepercentscale.patch

File remotepercentscale.patch, 4.8 kB (added by richard, 6 months ago)

Patch special-casing the percentage calculation for remote databases

  • matcher/multimatch.cc

     
    779779        vector<Xapian::Internal::MSetItem>::const_iterator best; 
    780780        best = min_element(items.begin(), items.end(), mcmp); 
    781781 
    782         if (termfreqandwts.size() > 1) { 
    783             Xapian::termcount matching_terms = 0; 
    784             map<string, 
    785                 Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator i; 
     782        unsigned int multiplier = db.internal.size(); 
     783        Assert(multiplier != 0); 
     784        Xapian::doccount n = (best->did - 1) % multiplier; // which actual database 
     785        // If the top result is from a remote database, then we can 
     786        // just use the percentage scaling calculated for that 
     787        // database. 
     788        if (is_remote[n]) { 
     789            RemoteSubMatch * rem_match; 
     790            rem_match = static_cast<RemoteSubMatch*>(leaves[n].get()); 
     791            percent_scale = rem_match->get_percent_factor(); 
     792        } else { 
     793            if (termfreqandwts.size() > 1) { 
     794                Xapian::termcount matching_terms = 0; 
     795                map<string, 
     796                    Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator i; 
    786797 
    787             Xapian::TermIterator docterms = db.termlist_begin(best->did); 
    788             Xapian::TermIterator docterms_end = db.termlist_end(best->did); 
    789             while (docterms != docterms_end) { 
    790                 i = termfreqandwts.find(*docterms); 
     798                Xapian::TermIterator docterms = db.termlist_begin(best->did); 
     799                Xapian::TermIterator docterms_end = db.termlist_end(best->did); 
     800                while (docterms != docterms_end) { 
     801                    i = termfreqandwts.find(*docterms); 
     802                    if (i != termfreqandwts.end()) { 
     803                        percent_scale += i->second.termweight; 
     804                        ++matching_terms; 
     805                        if (matching_terms == termfreqandwts.size()) break; 
     806                    } 
     807                    ++docterms; 
     808                } 
     809                // Special case for MatchAll queries 
     810                i = termfreqandwts.find(""); 
    791811                if (i != termfreqandwts.end()) { 
    792812                    percent_scale += i->second.termweight; 
    793813                    ++matching_terms; 
    794                     if (matching_terms == termfreqandwts.size()) break; 
    795814                } 
    796                 ++docterms; 
    797             } 
    798             // Special case for MatchAll queries 
    799             i = termfreqandwts.find(""); 
    800             if (i != termfreqandwts.end()) { 
    801                 percent_scale += i->second.termweight; 
    802                 ++matching_terms; 
    803             } 
    804             if (matching_terms < termfreqandwts.size()) { 
    805                 // OK, work out weight corresponding to 100% 
    806                 double denom = 0; 
    807                 for (i = termfreqandwts.begin(); i != termfreqandwts.end(); ++i) 
    808                     denom += i->second.termweight; 
     815                if (matching_terms < termfreqandwts.size()) { 
     816                    // OK, work out weight corresponding to 100% 
     817                    double denom = 0; 
     818                    for (i = termfreqandwts.begin(); i != termfreqandwts.end(); ++i) 
     819                        denom += i->second.termweight; 
    809820 
    810                 DEBUGLINE(MATCH, "denom = " << denom << " percent_scale = " << percent_scale); 
    811                 Assert(percent_scale <= denom); 
    812                 denom *= greatest_wt; 
    813                 Assert(denom > 0); 
    814                 percent_scale /= denom; 
     821                    DEBUGLINE(MATCH, "denom = " << denom << " percent_scale = " << percent_scale); 
     822                    Assert(percent_scale <= denom); 
     823                    denom *= greatest_wt; 
     824                    Assert(denom > 0); 
     825                    percent_scale /= denom; 
     826                } else { 
     827                    // If all the terms match, the 2 sums of weights cancel 
     828                    percent_scale = 1.0 / greatest_wt; 
     829                } 
    815830            } else { 
    816                 // If all the terms match, the 2 sums of weights cancel 
     831                // If there's only a single term in the query, the top document 
     832                // must score 100%. 
    817833                percent_scale = 1.0 / greatest_wt; 
    818834            } 
    819         } else { 
    820             // If there's only a single term in the query, the top document 
    821             // must score 100%. 
    822             percent_scale = 1.0 / greatest_wt; 
    823835        } 
    824836        Assert(percent_scale > 0); 
    825837        if (percent_cutoff) { 
  • matcher/remotesubmatch.cc

     
    6363              "[matcher], " << (void*)termfreqandwts); 
    6464    Xapian::MSet mset; 
    6565    db->get_mset(mset); 
     66    percent_factor = mset.internal->percent_factor; 
    6667    if (termfreqandwts) *termfreqandwts = mset.internal->termfreqandwts; 
    6768    return new MSetPostList(mset, decreasing_relevance); 
    6869} 
  • matcher/remotesubmatch.h

     
    4444     */ 
    4545    bool decreasing_relevance; 
    4646 
     47    /// The factor to use to convert weights to percentages. 
     48    double percent_factor; 
     49 
    4750  public: 
    4851    /// Constructor. 
    4952    RemoteSubMatch(RemoteDatabase *db_, bool decreasing_relevance_); 
     
    6164    PostList * get_postlist_and_term_info(MultiMatch *matcher, 
    6265        map<string, Xapian::MSet::Internal::TermFreqAndWeight> *termfreqandwts); 
    6366 
     67    /// Get percentage factor - only valid after get_postlist_and_term_info(). 
     68    double get_percent_factor() const { return percent_factor; } 
     69 
    6470    /// Short-cut for single remote match. 
    6571    void get_mset(Xapian::MSet & mset) { db->get_mset(mset); } 
    6672};