Ticket #164: weak-skip_to-updated.patch

File weak-skip_to-updated.patch, 12.6 KB (added by Olly Betts, 17 years ago)

Updated patch

  • matcher/branchpostlist.h

     
    5757        /** Utility method, to call recalc_maxweight() and do the pruning
    5858         *  if a next() or skip_to() returns non-NULL result.
    5959         */
    60         void handle_prune(PostList *&kid, PostList *ret) {
     60        bool handle_prune(PostList *&kid, PostList *ret) {
    6161            if (ret) {
    6262                delete kid;
    6363                kid = ret;
    6464
    6565                // now tell matcher that maximum weights need recalculation.
    6666                matcher->recalc_maxweight();
     67                return true;
    6768            }
     69            return false;
    6870        }
    6971
    7072    public:
     
    112114    return true;
    113115}
    114116
     117inline bool
     118weak_skip_to_handling_prune(PostList * & pl, Xapian::docid did, Xapian::weight w_min,
     119                            MultiMatch *matcher, bool & found)
     120{
     121    PostList *p = pl->weak_skip_to(did, w_min, found);
     122    if (!p) return false;
     123    delete pl;
     124    pl = p;
     125    // now tell matcher that maximum weights need recalculation.
     126    if (matcher) matcher->recalc_maxweight();
     127    return true;
     128}
     129
    115130#endif /* OM_HGUARD_BRANCHPOSTLIST_H */
  • matcher/andpostlist.h

     
    3737    protected:
    3838        Xapian::docid head;
    3939        Xapian::weight lmax, rmax;
     40
     41        bool l_weak;
     42        bool r_weak;
     43
    4044    private:
    4145        Xapian::doccount dbsize;
    4246
    43         void process_next_or_skip_to(Xapian::weight w_min, PostList *ret);
     47        void process_next_or_skip_to(Xapian::weight w_min, Xapian::docid did);
     48        void process_half_weak_next_or_skip_to(Xapian::weight w_min, Xapian::docid did);
     49
     50        /// If r is weak, but l isn't, swap r and l.
     51        /// Return true if a swap happened.
     52        bool swap_to_make_l_weak();
    4453    public:
    4554        Xapian::doccount get_termfreq_max() const;
    4655        Xapian::doccount get_termfreq_min() const;
  • matcher/valuerangepostlist.cc

     
    101101ValueRangePostList::next(Xapian::weight)
    102102{
    103103    Assert(db);
    104     Xapian::docid lastdocid = db->get_lastdocid();
     104    AssertParanoid(lastdocid == db->get_lastdocid());
    105105    while (current < lastdocid) {
    106106        try {
    107107            if (++current == 0) break;
     
    126126    return ValueRangePostList::next(w_min);
    127127}
    128128
     129PostList *
     130ValueRangePostList::weak_skip_to(Xapian::docid did, Xapian::weight,
     131                                 bool & found)
     132{
     133    Assert(db);
     134    AssertParanoid(lastdocid == db->get_lastdocid());
     135    if (did <= current) {
     136        found = true;
     137        return NULL;
     138    }
     139    current = did;
     140    if (current > lastdocid) {
     141        db = NULL;
     142        found = true;
     143        return NULL;
     144    }
     145
     146    try {
     147        AutoPtr<Xapian::Document::Internal> doc(db->open_document(current, true));
     148        string v = doc->get_value(valno);
     149        if (v >= begin && v <= end) {
     150            found = true;
     151            return NULL;
     152        }
     153    } catch (const Xapian::DocNotFoundError &) {
     154        // Document doesn't exist.
     155    }
     156    // Haven't determined the position of the next match, other than knowing that
     157    // it's not at document "did".
     158    found = false;
     159    return NULL;
     160}
     161
    129162bool
    130163ValueRangePostList::at_end() const
    131164{
  • matcher/valuerangepostlist.h

     
    3333
    3434    Xapian::docid current;
    3535
     36    /// Flag, true iff \a current points to a document which satisfies the restriction.
     37    bool current_valid;
     38
     39    /// Highest docid used in the database.
     40    Xapian::docid lastdocid;
     41
    3642    /// Disallow copying.
    3743    ValueRangePostList(const ValueRangePostList &);
    3844
     
    4349    ValueRangePostList(const Xapian::Database::Internal *db_,
    4450                       Xapian::valueno valno_,
    4551                       const std::string &begin_, const std::string &end_)
    46         : db(db_), valno(valno_), begin(begin_), end(end_), current(0) { }
     52        : db(db_), valno(valno_), begin(begin_), end(end_), current(0),
     53          current_valid(false), lastdocid(db->get_lastdocid())
     54    { }
    4755
    4856    Xapian::doccount get_termfreq_min() const;
    4957
     
    6977
    7078    PostList * skip_to(Xapian::docid, Xapian::weight w_min);
    7179
     80    PostList * weak_skip_to(Xapian::docid did, Xapian::weight w_min, bool & found);
     81
     82    bool prefer_weak_skip_to() { return true; }
     83
    7284    bool at_end() const;
    7385
    7486    string get_description() const;
  • matcher/andpostlist.cc

     
    2828#include "omdebug.h"
    2929
    3030inline void
    31 AndPostList::process_next_or_skip_to(Xapian::weight w_min, PostList *ret)
     31AndPostList::process_next_or_skip_to(Xapian::weight w_min, Xapian::docid did)
    3232{
    3333    DEBUGCALL(MATCH, void, "AndPostList::process_next_or_skip_to",
    34               w_min << ", " << ret);
     34              w_min << ", " << did);
     35
    3536    head = 0;
    36     handle_prune(r, ret);
     37    if (did == 0) {
     38        if (handle_prune(r, r->next(w_min - lmax)))
     39            swap_to_make_l_weak();
     40    } else {
     41        if (handle_prune(r, r->skip_to(did, w_min - lmax)))
     42            swap_to_make_l_weak();
     43    }
     44
    3745    DEBUGLINE(MATCH, "r at_end = " << r->at_end());
    3846    if (r->at_end()) return;
    3947
     
    4250    Xapian::docid rhead = r->get_docid();
    4351    DEBUGLINE(MATCH, "rhead " << rhead);
    4452    DEBUGLINE(MATCH, "w_min " << w_min << " rmax " << rmax);
    45     skip_to_handling_prune(l, rhead, w_min - rmax, matcher);
     53    if (skip_to_handling_prune(l, rhead, w_min - rmax, matcher))
     54        swap_to_make_l_weak();
    4655    DEBUGLINE(MATCH, "l at_end = " << l->at_end());
    4756    if (l->at_end()) return;
    4857
     
    5362        if (lhead < rhead) {
    5463            // FIXME: CSE these w_min values?
    5564            // But note that lmax and rmax may change on recalc_maxweight...
    56             skip_to_handling_prune(l, rhead, w_min - rmax, matcher);
     65            if (skip_to_handling_prune(l, rhead, w_min - rmax, matcher))
     66                swap_to_make_l_weak();
    5767            DEBUGLINE(MATCH, "l at_end = " << l->at_end());
    5868            if (l->at_end()) {
    5969                head = 0;
     
    6272            lhead = l->get_docid();
    6373            DEBUGLINE(MATCH, "lhead " << lhead);
    6474        } else {
    65             skip_to_handling_prune(r, lhead, w_min - lmax, matcher);
     75            if (skip_to_handling_prune(r, lhead, w_min - lmax, matcher))
     76                swap_to_make_l_weak();
    6677            DEBUGLINE(MATCH, "r at_end = " << r->at_end());
    6778            if (r->at_end()) {
    6879                head = 0;
     
    7788    return;
    7889}
    7990
     91inline void
     92AndPostList::process_half_weak_next_or_skip_to(Xapian::weight w_min, Xapian::docid did)
     93{
     94    DEBUGCALL(MATCH, void, "AndPostList::process_half_weak_next_or_skip_to",
     95              w_min << ", " << did);
     96
     97    bool pruned = false;
     98
     99    head = 0;
     100    if (did == 0) {
     101        pruned = handle_prune(r, r->next(w_min - lmax));
     102    } else {
     103        pruned = handle_prune(r, r->skip_to(did, w_min - lmax));
     104    }
     105
     106    DEBUGLINE(MATCH, "r at_end = " << r->at_end());
     107    if (r->at_end()) return;
     108
     109    // r has just been advanced by next or skip_to so must be > head
     110    // (and head is the current position of l)
     111    Xapian::docid rhead = r->get_docid();
     112    DEBUGLINE(MATCH, "rhead " << rhead);
     113    DEBUGLINE(MATCH, "w_min " << w_min << " rmax " << rmax);
     114    bool found = false;
     115    Xapian::docid lhead = rhead;
     116
     117    while (true) {
     118        if (pruned && swap_to_make_l_weak()) {
     119            DEBUGLINE(MATCH, "swapped l and r - swapping lhead and rhead");
     120            Xapian::docid tmp = lhead;
     121            lhead = rhead;
     122            rhead = tmp;
     123        }
     124
     125        if (lhead == rhead) {
     126            if (found) break;
     127            // Not found, therefore l needs to be advanced, therefore so does
     128            // r, but it's probably more efficient to move r first.
     129            pruned = next_handling_prune(r, w_min - lmax, matcher);
     130            DEBUGLINE(MATCH, "r at_end = " << r->at_end());
     131            if (r->at_end()) return;
     132            rhead = r->get_docid();
     133            DEBUGLINE(MATCH, "rhead " << rhead);
     134        } else if (lhead < rhead) {
     135            pruned = weak_skip_to_handling_prune(l, rhead, w_min - rmax, matcher, found);
     136            DEBUGLINE(MATCH, "l at_end = " << l->at_end());
     137            if (l->at_end()) return;
     138            lhead = l->get_docid();
     139            DEBUGLINE(MATCH, "l found = " << found << ", lhead = " << lhead);
     140        } else if (lhead > rhead) {
     141            pruned = skip_to_handling_prune(r, lhead, w_min - lmax, matcher);
     142            DEBUGLINE(MATCH, "r at_end = " << r->at_end());
     143            if (r->at_end()) return;
     144            rhead = r->get_docid();
     145            DEBUGLINE(MATCH, "rhead " << rhead);
     146        }
     147    }
     148
     149    head = lhead;
     150    return;
     151}
     152
    80153AndPostList::AndPostList(PostList *left_, PostList *right_,
    81154                         MultiMatch *matcher_,
    82155                         Xapian::doccount dbsize_,
     
    91164        lmax = l->get_maxweight();
    92165        rmax = r->get_maxweight();
    93166    }
     167
     168    swap_to_make_l_weak();
     169}
     170
     171bool
     172AndPostList::swap_to_make_l_weak()
     173{
     174    DEBUGCALL(MATCH, bool, "AndPostList::swap_to_make_l_weak", "");
     175    l_weak = l->prefer_weak_skip_to();
     176    r_weak = r->prefer_weak_skip_to();
     177    DEBUGLINE(MATCH, "l_weak=" << l_weak << ", r_weak=" << r_weak);
     178    if (l_weak != r_weak) {
     179        if (r_weak) {
     180            // Swap, so that l is weak if only one subtree is weak.
     181            PostList * tmp_pl = r; r = l; l = tmp_pl;
     182            Xapian::weight tmp_wt = rmax; rmax = lmax; lmax = tmp_wt;
     183            l_weak = true;
     184            r_weak = false;
     185            AssertEq(l_weak, l->prefer_weak_skip_to());
     186            AssertEq(r_weak, r->prefer_weak_skip_to());
     187            RETURN(true);
     188        }
     189    }
     190    RETURN(false);
    94191}
    95192
    96193PostList *
    97194AndPostList::next(Xapian::weight w_min)
    98195{
    99196    DEBUGCALL(MATCH, PostList *, "AndPostList::next", w_min);
    100     process_next_or_skip_to(w_min, r->next(w_min - lmax));
     197    if (l_weak && !r_weak) {
     198        process_half_weak_next_or_skip_to(w_min, 0);
     199    } else {
     200        process_next_or_skip_to(w_min, 0);
     201    }
    101202    RETURN(NULL);
    102203}
    103204
     
    105206AndPostList::skip_to(Xapian::docid did, Xapian::weight w_min)
    106207{
    107208    DEBUGCALL(MATCH, PostList *, "AndPostList::skip_to", did << ", " << w_min);
    108     if (did > head)
    109         process_next_or_skip_to(w_min, r->skip_to(did, w_min - lmax));
     209    if (did > head) {
     210        if (l_weak && !r_weak) {
     211            process_half_weak_next_or_skip_to(w_min, did);
     212        } else {
     213            process_next_or_skip_to(w_min, did);
     214        }
     215    }
    110216    RETURN(NULL);
    111217}
    112218
  • common/postlist.h

     
    153153     */
    154154    Internal *skip_to(Xapian::docid did) { return skip_to(did, 0.0); }
    155155
     156    /** Attempt to find the first entry in the postlist after a given
     157     *  location.
     158     *
     159     *  This attempts to find the first entry in the postlist at or after a
     160     *  given location, but if it can't quickly determine which document
     161     *  this is will give up and report failure, rather than spend a great
     162     *  deal of time looking for the document.
     163     *
     164     *  If the first entry in the postlist is at the given location, this
     165     *  call must find it.
     166     *
     167     *  If the first entry is found, the \a found parameter is set to true,
     168     *  and the postlist is left in exactly the same state as it would be
     169     *  if \a skip_to() had been called.
     170     *
     171     *  If the first entry is not found, the \a found parameter is set to
     172     *  false, and the postlist is left pointing to no document, but the
     173     *  next time that next() is called, the postlist will be moved to the
     174     *  first entry after that specified in this call.  (Or, if the next
     175     *  call is to skip_to(), the skip_to() will behave as if the postlist
     176     *  currently pointed to a document in the range defined by that
     177     *  specified in this call and the location of the actual next entry in
     178     *  the list.)  If \a get_docid() is called immediately after this
     179     *  method has returned having set \a found to false, a value for the
     180     *  lowest possible document ID which might be the next matching entry
     181     *  should be returned.  (This can always be calculated cheaply,
     182     *  because \a did is always a valid return value here.)
     183     *
     184     *  @param found This is a flag, which will be set to true if the next
     185     *  entry is found, or if there is determined to be no next entry, and
     186     *  to false otherwise.
     187     */
     188    virtual Internal * weak_skip_to(Xapian::docid did, Xapian::weight w_min,
     189                                    bool & found) {
     190        found = true;
     191        return skip_to(did, w_min);
     192    }
     193
     194    /** Flag, true if we should try weak_skip_to operations rather than skip_to
     195     *  when possible when calling this postlist.
     196     */
     197    virtual bool prefer_weak_skip_to() { return false; }
     198
    156199    /// Return a string description of this object.
    157200    virtual std::string get_description() const = 0;
    158201};