Ticket #164: weak_skip_to.patch

File weak_skip_to.patch, 12.4 KB (added by Richard Boulton, 17 years ago)

Rough implementation of a possible fix.

  • matcher/branchpostlist.h

     
    5656        /** Utility method, to call recalc_maxweight() and do the pruning
    5757         *  if a next() or skip_to() returns non-NULL result.
    5858         */
    59         void handle_prune(PostList *&kid, PostList *ret) {
     59        bool handle_prune(PostList *&kid, PostList *ret) {
    6060            if (ret) {
    6161                delete kid;
    6262                kid = ret;
    6363
    6464                // now tell matcher that maximum weights need recalculation.
    6565                matcher->recalc_maxweight();
     66                return true;
    6667            }
     68            return false;
    6769        }
    6870
    6971    public:
     
    123125    return true;
    124126}
    125127
     128inline bool
     129weak_skip_to_handling_prune(PostList * & pl, Xapian::docid did, Xapian::weight w_min,
     130                            MultiMatch *matcher, bool & found)
     131{
     132    PostList *p = pl->weak_skip_to(did, w_min, found);
     133    if (!p) return false;
     134    delete pl;
     135    pl = p;
     136    // now tell matcher that maximum weights need recalculation.
     137    if (matcher) matcher->recalc_maxweight();
     138    return true;
     139}
     140
    126141#endif /* OM_HGUARD_BRANCHPOSTLIST_H */
  • matcher/andpostlist.h

     
    3737    protected:
    3838        Xapian::docid head;
    3939        Xapian::weight lmax, rmax;
     40
     41        bool l_weak;
     42        bool r_weak;
     43
    4044    private:
    4145        Xapian::doccount dbsize;
    4246
    43         void process_next_or_skip_to(Xapian::weight w_min, PostList *ret);
     47        void process_next_or_skip_to(Xapian::weight w_min, Xapian::docid did);
     48        void process_half_weak_next_or_skip_to(Xapian::weight w_min, Xapian::docid did);
     49
     50        /// If r is weak, but l isn't, swap r and l.
     51        /// Return true if a swap happened.
     52        bool swap_to_make_l_weak();
    4453    public:
    4554        Xapian::doccount get_termfreq_max() const;
    4655        Xapian::doccount get_termfreq_min() const;
  • matcher/valuerangepostlist.cc

     
    100100ValueRangePostList::next(Xapian::weight)
    101101{
    102102    Assert(db);
    103     Xapian::docid lastdocid = db->get_lastdocid();
     103    AssertParanoid(lastdocid == db->get_lastdocid());
    104104    while (current < lastdocid) {
    105105        try {
    106106            if (++current == 0) break;
     
    125125    return next(w_min);
    126126}
    127127
     128PostList *
     129ValueRangePostList::weak_skip_to(Xapian::docid did, Xapian::weight,
     130                                 bool & found)
     131{
     132    Assert(db);
     133    AssertParanoid(lastdocid == db->get_lastdocid());
     134    if (did <= current) {
     135        found = true;
     136        return NULL;
     137    }
     138    current = did;
     139    if (current > lastdocid) {
     140        db = NULL;
     141        found = true;
     142        return NULL;
     143    }
     144
     145    try {
     146        AutoPtr<Xapian::Document::Internal> doc(db->open_document(current, true));
     147        string v = doc->get_value(valno);
     148        if (v >= begin && v <= end) {
     149            found = true;
     150            return NULL;
     151        }
     152    } catch (const Xapian::DocNotFoundError &) {
     153        // Document doesn't exist.
     154    }
     155    // Haven't determined the position of the next match, other than knowing that
     156    // it's not at document "did".
     157    found = false;
     158    return NULL;
     159}
     160
    128161bool
    129162ValueRangePostList::at_end() const
    130163{
  • matcher/valuerangepostlist.h

     
    3333
    3434    Xapian::docid current;
    3535
     36    /// Flag, true iff \a current points to a document which satisfies the restriction.
     37    bool current_valid;
     38
     39    /// Highest docid used in the database.
     40    Xapian::docid lastdocid;
     41
    3642    /// Disallow copying.
    3743    ValueRangePostList(const ValueRangePostList &);
    3844
     
    4349    ValueRangePostList(const Xapian::Database::Internal *db_,
    4450                       Xapian::valueno valno_,
    4551                       const std::string &begin_, const std::string &end_)
    46         : db(db_), valno(valno_), begin(begin_), end(end_), current(0) { }
     52        : db(db_), valno(valno_), begin(begin_), end(end_), current(0),
     53          current_valid(false), lastdocid(db->get_lastdocid())
     54    { }
    4755
    4856    Xapian::doccount get_termfreq_min() const;
    4957
     
    6977
    7078    PostList * skip_to(Xapian::docid, Xapian::weight w_min);
    7179
     80    PostList * weak_skip_to(Xapian::docid did, Xapian::weight w_min, bool & found);
     81
     82    bool prefer_weak_skip_to() { return true; }
     83
    7284    bool at_end() const;
    7385
    7486    string get_description() const;
  • matcher/andpostlist.cc

     
    2727#include "omdebug.h"
    2828
    2929inline void
    30 AndPostList::process_next_or_skip_to(Xapian::weight w_min, PostList *ret)
     30AndPostList::process_next_or_skip_to(Xapian::weight w_min, Xapian::docid did)
    3131{
    3232    DEBUGCALL(MATCH, void, "AndPostList::process_next_or_skip_to",
    33               w_min << ", " << ret);
     33              w_min << ", " << did);
     34
    3435    head = 0;
    35     handle_prune(r, ret);
     36    if (did == 0) {
     37        if (handle_prune(r, r->next(w_min - lmax)))
     38            swap_to_make_l_weak();
     39    } else {
     40        if  (handle_prune(r, r->skip_to(did, w_min - lmax)))
     41            swap_to_make_l_weak();
     42    }
     43
    3644    DEBUGLINE(MATCH, "r at_end = " << r->at_end());
    3745    if (r->at_end()) return;
    3846
     
    4149    Xapian::docid rhead = r->get_docid();
    4250    DEBUGLINE(MATCH, "rhead " << rhead);
    4351    DEBUGLINE(MATCH, "w_min " << w_min << " rmax " << rmax);
    44     skip_to_handling_prune(l, rhead, w_min - rmax, matcher);
     52    if (skip_to_handling_prune(l, rhead, w_min - rmax, matcher))
     53        swap_to_make_l_weak();
    4554    DEBUGLINE(MATCH, "l at_end = " << l->at_end());
    4655    if (l->at_end()) return;
    4756
     
    5261        if (lhead < rhead) {
    5362            // FIXME: CSE these w_min values?
    5463            // But note that lmax and rmax may change on recalc_maxweight...
    55             skip_to_handling_prune(l, rhead, w_min - rmax, matcher);
     64            if (skip_to_handling_prune(l, rhead, w_min - rmax, matcher))
     65                swap_to_make_l_weak();
    5666            DEBUGLINE(MATCH, "l at_end = " << l->at_end());
    5767            if (l->at_end()) {
    5868                head = 0;
     
    6171            lhead = l->get_docid();
    6272            DEBUGLINE(MATCH, "lhead " << lhead);
    6373        } else {
    64             skip_to_handling_prune(r, lhead, w_min - lmax, matcher);
     74            if (skip_to_handling_prune(r, lhead, w_min - lmax, matcher))
     75                swap_to_make_l_weak();
    6576            DEBUGLINE(MATCH, "r at_end = " << r->at_end());
    6677            if (r->at_end()) {
    6778                head = 0;
     
    7687    return;
    7788}
    7889
     90inline void
     91AndPostList::process_half_weak_next_or_skip_to(Xapian::weight w_min, Xapian::docid did)
     92{
     93    DEBUGCALL(MATCH, void, "AndPostList::process_half_weak_next_or_skip_to",
     94              w_min << ", " << did);
     95
     96    bool pruned = false;
     97
     98    head = 0;
     99    if (did == 0) {
     100        pruned = handle_prune(r, r->next(w_min - lmax));
     101    } else {
     102        pruned = handle_prune(r, r->skip_to(did, w_min - lmax));
     103    }
     104
     105    DEBUGLINE(MATCH, "r at_end = " << r->at_end());
     106    if (r->at_end()) return;
     107
     108    // r has just been advanced by next or skip_to so must be > head
     109    // (and head is the current position of l)
     110    Xapian::docid rhead = r->get_docid();
     111    DEBUGLINE(MATCH, "rhead " << rhead);
     112    DEBUGLINE(MATCH, "w_min " << w_min << " rmax " << rmax);
     113    bool found = false;
     114    Xapian::docid lhead = rhead;
     115
     116    while (true) {
     117        if (pruned && swap_to_make_l_weak()) {
     118            DEBUGLINE(MATCH, "swapped l and r - swapping lhead and rhead");
     119            Xapian::docid tmp = lhead;
     120            lhead = rhead;
     121            rhead = tmp;
     122        }
     123
     124        if (lhead == rhead) {
     125            if (found) break;
     126            // Not found, therefore l needs to be advanced, therefore so does
     127            // r, but it's probably more efficient to move r first.
     128            pruned = next_handling_prune(r, w_min - lmax, matcher);
     129            DEBUGLINE(MATCH, "r at_end = " << r->at_end());
     130            if (r->at_end()) return;
     131            rhead = r->get_docid();
     132            DEBUGLINE(MATCH, "rhead " << rhead);
     133        } else if (lhead < rhead) {
     134            pruned = weak_skip_to_handling_prune(l, rhead, w_min - rmax, matcher, found);
     135            DEBUGLINE(MATCH, "l at_end = " << l->at_end());
     136            if (l->at_end()) return;
     137            lhead = l->get_docid();
     138            DEBUGLINE(MATCH, "l found = " << found << ", lhead = " << lhead);
     139        } else if (lhead > rhead) {
     140            pruned = skip_to_handling_prune(r, lhead, w_min - lmax, matcher);
     141            DEBUGLINE(MATCH, "r at_end = " << r->at_end());
     142            if (r->at_end()) return;
     143            rhead = r->get_docid();
     144            DEBUGLINE(MATCH, "rhead " << rhead);
     145        }
     146    }
     147
     148    head = lhead;
     149    return;
     150}
     151
    79152AndPostList::AndPostList(PostList *left_, PostList *right_,
    80153                         MultiMatch *matcher_,
    81154                         Xapian::doccount dbsize_,
     
    90163        lmax = l->get_maxweight();
    91164        rmax = r->get_maxweight();
    92165    }
     166
     167    swap_to_make_l_weak();
    93168}
    94169
     170bool
     171AndPostList::swap_to_make_l_weak()
     172{
     173    DEBUGCALL(MATCH, bool, "AndPostList::swap_to_make_l_weak", "");
     174    l_weak = l->prefer_weak_skip_to();
     175    r_weak = r->prefer_weak_skip_to();
     176    DEBUGLINE(MATCH, "l_weak=" << l_weak << ", r_weak=" << r_weak);
     177    if (l_weak != r_weak) {
     178        if (r_weak) {
     179            // Swap, so that l is weak if only one subtree is weak.
     180            PostList * tmp_pl = r; r = l; l = tmp_pl;
     181            Xapian::weight tmp_wt = rmax; rmax = lmax; lmax = tmp_wt;
     182            l_weak = true;
     183            r_weak = false;
     184            AssertEq(l_weak, l->prefer_weak_skip_to());
     185            AssertEq(r_weak, r->prefer_weak_skip_to());
     186            RETURN(true);
     187        }
     188    }
     189    RETURN(false);
     190}
     191
    95192PostList *
    96193AndPostList::next(Xapian::weight w_min)
    97194{
    98195    DEBUGCALL(MATCH, PostList *, "AndPostList::next", w_min);
    99     process_next_or_skip_to(w_min, r->next(w_min - lmax));
     196    if (l_weak && !r_weak) {
     197        process_half_weak_next_or_skip_to(w_min, 0);
     198    } else {
     199        process_next_or_skip_to(w_min, 0);
     200    }
    100201    RETURN(NULL);
    101202}
    102203
     
    104205AndPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
    105206{
    106207    DEBUGCALL(MATCH, PostList *, "AndPostList::skip_to", did << ", " << w_min);
    107     if (did > head)
    108         process_next_or_skip_to(w_min, r->skip_to(did, w_min - lmax));
     208    if (did > head) {
     209        if (l_weak && !r_weak) {
     210            process_half_weak_next_or_skip_to(w_min, did);
     211        } else {
     212            process_next_or_skip_to(w_min, did);
     213        }
     214    }
    109215    RETURN(NULL);
    110216}
    111217
  • common/postlist.h

     
    180180         */
    181181        virtual Internal *skip_to(Xapian::docid, Xapian::weight w_min) = 0;
    182182
     183        /** Attempt to find the first entry in the postlist after a given
     184         *  location.
     185         *
     186         *  This attempts to find the first entry in the postlist at or after a
     187         *  given location, but if it can't quickly determine which document
     188         *  this is will give up and report failure, rather than spend a great
     189         *  deal of time looking for the document.
     190         *
     191         *  If the first entry in the postlist is at the given location, this
     192         *  call must find it.
     193         *
     194         *  If the first entry is found, the \a found parameter is set to true,
     195         *  and the postlist is left in exactly the same state as it would be
     196         *  if \a skip_to() had been called.
     197         *
     198         *  If the first entry is not found, the \a found parameter is set to
     199         *  false, and the postlist is left pointing to no document, but the
     200         *  next time that next() is called, the postlist will be moved to the
     201         *  first entry after that specified in this call.  (Or, if the next
     202         *  call is to skip_to(), the skip_to() will behave as if the postlist
     203         *  currently pointed to a document in the range defined by that
     204         *  specified in this call and the location of the actual next entry in
     205         *  the list.)  If \a get_docid() is called immediately after this
     206         *  method has returned having set \a found to false, a value for the
     207         *  lowest possible document ID which might be the next matching entry
     208         *  should be returned.  (This can always be calculated cheaply,
     209         *  because \a did is always a valid return value here.)
     210         *
     211         *  @param found This is a flag, which will be set to true if the next
     212         *  entry is found, or if there is determined to be no next entry, and
     213         *  to false otherwise.
     214         */
     215        virtual Internal * weak_skip_to(Xapian::docid did, Xapian::weight w_min,
     216                                        bool & found) {
     217            found = true;
     218            return skip_to(did, w_min);
     219        }
     220
     221        /** Flag, true if we should try weak_skip_to operations rather than skip_to
     222         *  when possible when calling this postlist.
     223         */
     224        virtual bool prefer_weak_skip_to() { return false; }
     225
    183226        /// Returns true if we're off the end of the list
    184227        virtual bool at_end() const = 0;
    185228