Ticket #508: xapian-orpositionlist.patch

File xapian-orpositionlist.patch, 8.5 KB (added by Olly Betts, 8 years ago)

Patch to add OrPositionList

  • xapian-core/matcher/Makefile.mk

    diff --git a/xapian-core/matcher/Makefile.mk b/xapian-core/matcher/Makefile.mk
    index 2e1a4b9d..6a0aa722 100644
    a b noinst_HEADERS +=\  
    1616        matcher/multimatch.h\
    1717        matcher/multixorpostlist.h\
    1818        matcher/nearpostlist.h\
     19        matcher/orpositionlist.h\
    1920        matcher/orpostlist.h\
    2021        matcher/phrasepostlist.h\
    2122        matcher/queryoptimiser.h\
    lib_src +=\  
    5455        matcher/multimatch.cc\
    5556        matcher/multixorpostlist.cc\
    5657        matcher/nearpostlist.cc\
     58        matcher/orpositionlist.cc\
    5759        matcher/orpostlist.cc\
    5860        matcher/phrasepostlist.cc\
    5961        matcher/selectpostlist.cc\
  • new file xapian-core/matcher/orpositionlist.cc

    diff --git a/xapian-core/matcher/orpositionlist.cc b/xapian-core/matcher/orpositionlist.cc
    new file mode 100644
    index 00000000..acbb787a
    - +  
     1/** @file orpositionlist.cc
     2 * @brief Merge two PositionList objects using an OR operation.
     3 */
     4/* Copyright (C) 2007,2010,2016 Olly Betts
     5 *
     6 * This program is free software; you can redistribute it and/or
     7 * modify it under the terms of the GNU General Public License as
     8 * published by the Free Software Foundation; either version 2 of the
     9 * License, or (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     19 */
     20
     21#include <config.h>
     22
     23#include "orpositionlist.h"
     24
     25#include "debuglog.h"
     26#include "omassert.h"
     27
     28using namespace std;
     29
     30Xapian::termcount
     31OrPositionList::get_size() const
     32{
     33    LOGCALL(EXPAND, Xapian::termcount, "OrPositionList::get_size", NO_ARGS);
     34    // This is actually the upper bound, but generally there's only one term
     35    // at each position, so it'll usually be correct too.
     36    RETURN(left->get_size() + right->get_size());
     37}
     38
     39Xapian::termpos
     40OrPositionList::get_position() const
     41{
     42    LOGCALL(EXPAND, Xapian::termpos, "OrPositionList::get_position", NO_ARGS);
     43    if (left_current < right_current) RETURN(left_current);
     44    RETURN(right_current);
     45}
     46
     47void
     48OrPositionList::next()
     49{
     50    LOGCALL_VOID(EXPAND, "OrPositionList::next", NO_ARGS);
     51    // If we've not started yet, both left_current and right_current will be 0,
     52    // which gets handled by calling next() on both, which is what we want to
     53    // do to get started.
     54    if (left_current <= right_current) {
     55        bool equal = (left_current == right_current);
     56        if (equal && left_current == Xapian::termpos(-1)) {
     57            // Both either at_end() or on the largest value and will be
     58            // at_end() if we call next().
     59            left = right = NULL;
     60            return;
     61        }
     62
     63        left->next();
     64        if (left->at_end()) {
     65            left_current = Xapian::termpos(-1);
     66            left = NULL;
     67        } else {
     68            left_current = left->get_position();
     69        }
     70        if (!equal) return;
     71    }
     72
     73    right->next();
     74    if (right->at_end()) {
     75        right_current = Xapian::termpos(-1);
     76        right = NULL;
     77    } else {
     78        right_current = right->get_position();
     79    }
     80}
     81
     82void
     83OrPositionList::skip_to(Xapian::termpos termpos)
     84{
     85    LOGCALL_VOID(EXPAND, "OrPositionList::skip_to", termpos);
     86    // If we've not started yet, both left_current and right_current will be 0,
     87    // which gets handled by calling next() on both, which is what we want to
     88    // do to get started.
     89    if (left_current <= right_current) {
     90        bool equal = (left_current == right_current);
     91        if (equal && left_current == Xapian::termpos(-1)) {
     92            // Both either at_end() or on the largest value, so skip_to() is a
     93            // no-op.
     94            return;
     95        }
     96
     97        left->skip_to(termpos);
     98        if (left->at_end()) {
     99            left_current = Xapian::termpos(-1);
     100            left = NULL;
     101        } else {
     102            left_current = left->get_position();
     103        }
     104        if (!equal) return;
     105    }
     106
     107    right->skip_to(termpos);
     108    if (right->at_end()) {
     109        right_current = Xapian::termpos(-1);
     110        right = NULL;
     111    } else {
     112        right_current = right->get_position();
     113    }
     114}
     115
     116bool
     117OrPositionList::at_end() const
     118{
     119    LOGCALL(EXPAND, bool, "OrPositionList::at_end", NO_ARGS);
     120    RETURN(left == NULL && right == NULL);
     121}
  • new file xapian-core/matcher/orpositionlist.h

    diff --git a/xapian-core/matcher/orpositionlist.h b/xapian-core/matcher/orpositionlist.h
    new file mode 100644
    index 00000000..6b271673
    - +  
     1/** @file orpositionlist.h
     2 * @brief Merge two PositionList objects using an OR operation.
     3 */
     4/* Copyright (C) 2007,2010 Olly Betts
     5 *
     6 * This program is free software; you can redistribute it and/or
     7 * modify it under the terms of the GNU General Public License as
     8 * published by the Free Software Foundation; either version 2 of the
     9 * License, or (at your option) any later version.
     10 *
     11 * This program is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 * GNU General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
     19 */
     20
     21#ifndef XAPIAN_INCLUDED_ORPOSITIONLIST_H
     22#define XAPIAN_INCLUDED_ORPOSITIONLIST_H
     23
     24#include "backends/positionlist.h"
     25
     26class OrPositionList : public PositionList {
     27  protected:
     28    /// The two PositionList objects we're merging.
     29    PositionList *left, *right;
     30
     31    /// The current position for left and right respectively.
     32    Xapian::termpos left_current, right_current;
     33
     34  public:
     35    OrPositionList() { }
     36
     37    void init(PositionList * left_, PositionList * right_) {
     38        left = left_;
     39        right = right_;
     40        left_current = 0;
     41        right_current = 0;
     42    }
     43
     44    Xapian::termcount get_size() const;
     45
     46    Xapian::termpos get_position() const;
     47
     48    void next();
     49
     50    void skip_to(Xapian::termpos termpos);
     51
     52    bool at_end() const;
     53};
     54
     55#endif // XAPIAN_INCLUDED_ORPOSITIONLIST_H
  • xapian-core/matcher/orpostlist.cc

    diff --git a/xapian-core/matcher/orpostlist.cc b/xapian-core/matcher/orpostlist.cc
    index 88c65fe3..2613b09f 100644
    a b  
    22 *
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2001,2002 Ananova Ltd
    5  * Copyright 2003,2004,2007,2008,2009,2010,2011,2012 Olly Betts
     5 * Copyright 2003,2004,2007,2008,2009,2010,2011,2012,2016 Olly Betts
    66 * Copyright 2009 Lemur Consulting Ltd
    77 * Copyright 2010 Richard Boulton
    88 *
     
    3030#include "andmaybepostlist.h"
    3131#include "omassert.h"
    3232
     33#include "orpositionlist.h"
     34
    3335#include <algorithm>
    3436
    3537OrPostList::OrPostList(PostList *left_,
    OrPostList::recalc_maxweight()  
    370372    RETURN(OrPostList::get_maxweight());
    371373}
    372374
     375PositionList *
     376OrPostList::read_position_list()
     377{
     378    if (lhead < rhead) return l->read_position_list();
     379    if (lhead > rhead) return r->read_position_list();
     380    position_list.init(l->read_position_list(), r->read_position_list());
     381    return &position_list;
     382}
     383
    373384bool
    374385OrPostList::at_end() const
    375386{
  • xapian-core/matcher/orpostlist.h

    diff --git a/xapian-core/matcher/orpostlist.h b/xapian-core/matcher/orpostlist.h
    index bde56c9a..86c542fd 100644
    a b  
    33 */
    44/* Copyright 1999,2000,2001 BrightStation PLC
    55 * Copyright 2002 Ananova Ltd
    6  * Copyright 2003,2004,2009,2010,2011 Olly Betts
     6 * Copyright 2003,2004,2009,2010,2011,2016 Olly Betts
    77 * Copyright 2009 Lemur Consulting Ltd
    88 * Copyright 2010 Richard Boulton
    99 *
     
    2727#define OM_HGUARD_ORPOSTLIST_H
    2828
    2929#include "branchpostlist.h"
     30#include "orpositionlist.h"
    3031
    3132/** A postlist comprising two postlists ORed together.
    3233 *
    class OrPostList : public BranchPostList {  
    4142        bool lvalid, rvalid;
    4243        double lmax, rmax, minmax;
    4344        Xapian::doccount dbsize;
     45        // FIXME: Do we want an OrWithPosPostList so we don't have the overhead
     46        // of an OrPositionList object on every OrPostList.
     47        OrPositionList position_list;
    4448    public:
    4549        Xapian::doccount get_termfreq_max() const;
    4650        Xapian::doccount get_termfreq_min() const;
    class OrPostList : public BranchPostList {  
    5458
    5559        double recalc_maxweight();
    5660
     61        PositionList * read_position_list();
     62
    5763        PostList *next(double w_min);
    5864        PostList *skip_to(Xapian::docid did, double w_min);
    5965        PostList *check(Xapian::docid did, double w_min, bool &valid);