Ticket #47: alldocpostlists.2.patch

File alldocpostlists.2.patch, 50.2 KB (added by Richard Boulton, 18 years ago)

Complete implementation of all document postlists

  • docs/quartzdesign.html

     
    243243It is quite possible that the termlists and
    244244position lists would benefit from being split into chunks in this way.
    245245
     246<h2>All document lists</h2>
     247
     248It is possible to use the Xapian API to obtain a list of all documents in the
     249database.  This is done by creating a special postinglist.  This functionality
     250was added after the file structure in use by Quartz was frozen, and it is
     251unfortunately impossible to implement efficiently for Quartz.
     252
     253The problem is that it is not possible to read the list of documents in sorted
     254order direct from disk - instead, the list is read into memory to be sorted.
     255For databases which do not have sparse document IDs, this should not use much
     256memory since the list is kept in memory in a range-compressed form (but does
     257require an iteration over the entirety of one of the tables of the Quartz
     258database - no skipping can be done in this case.  This is unlikely to be fixed,
     259since we don't believe it can be without changing Quartz's structure.  In any
     260case, it is not a priority since Quartz is due to be replaced by Flint as the
     261default backend soon.
     262
    246263<h2>Btree implementation</h2>
    247264
    248265The tables are currently all implemented as B-trees (actually a form of
  • tests/apitest.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2003,2004,2006 Olly Betts
     6 * Copyright 2006 Richard Boulton
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    4546BackendManager backendmanager;
    4647
    4748Xapian::Database
     49get_database()
     50{
     51    vector<string> dbnames;
     52    return backendmanager.get_database(dbnames);
     53}
     54
     55Xapian::Database
    4856get_database(const string &dbname)
    4957{
    5058    return backendmanager.get_database(dbname);
     
    101109    RUNTESTS("inmemory", anydb);
    102110    RUNTESTS("inmemory", specchar);
    103111    RUNTESTS("inmemory", writabledb);
     112    RUNTESTS("inmemory", writablelocaldb);
    104113    RUNTESTS("inmemory", localdb);
    105114    RUNTESTS("inmemory", positionaldb);
    106115    RUNTESTS("inmemory", localpositionaldb);
     
    114123    RUNTESTS("flint", anydb);
    115124    RUNTESTS("flint", specchar);
    116125    RUNTESTS("flint", writabledb);
     126    RUNTESTS("flint", writablelocaldb);
    117127    RUNTESTS("flint", localdb);
    118128    RUNTESTS("flint", positionaldb);
    119129    RUNTESTS("flint", localpositionaldb);
     
    129139    RUNTESTS("quartz", anydb);
    130140    RUNTESTS("quartz", specchar);
    131141    RUNTESTS("quartz", writabledb);
     142    RUNTESTS("quartz", writablelocaldb);
    132143    RUNTESTS("quartz", localdb);
    133144    RUNTESTS("quartz", positionaldb);
    134145    RUNTESTS("quartz", localpositionaldb);
  • tests/apitest.h

     
    33 * ----START-LICENCE----
    44 * Copyright 1999,2000,2001 BrightStation PLC
    55 * Copyright 2003,2004 Olly Betts
     6 * Copyright 2006 Richard Boulton
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    2627
    2728#include <xapian.h>
    2829
     30Xapian::Database get_database();
    2931Xapian::Database get_database(const std::string &dbname);
    3032Xapian::Database get_database(const std::string &dbname,
    3133                              const std::string &dbname2);
  • tests/api_db.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006 Olly Betts
     6 * Copyright 2006 Richard Boulton
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    895896{
    896897    Xapian::Database db(get_database("apitest_simpledata"));
    897898
    898     TEST_EXCEPTION(Xapian::InvalidArgumentError, db.postlist_begin(""));
    899 
    900899    TEST_EQUAL(db.postlist_begin("rosebud"), db.postlist_end("rosebud"));
    901900
    902901    string s = "let_us_see_if_we_can_break_it_with_a_really_really_long_term.";
     
    10111010    return true;
    10121011}
    10131012
     1013// tests all document postlists
     1014static bool test_allpostlist1()
     1015{
     1016    Xapian::Database db(get_database("apitest_manydocs"));
     1017    Xapian::PostingIterator i = db.postlist_begin("");
     1018    unsigned int j = 1;
     1019    while (i != db.postlist_end("")) {
     1020        TEST_EQUAL(*i, j);
     1021        i++;
     1022        j++;
     1023    }
     1024    TEST_EQUAL(j, 513);
     1025
     1026    i = db.postlist_begin("");
     1027    j = 1;
     1028    while (i != db.postlist_end("")) {
     1029        TEST_EQUAL(*i, j);
     1030        i++;
     1031        j++;
     1032        if (j == 50) {
     1033            j += 10;
     1034            i.skip_to(j);
     1035        }
     1036    }
     1037    TEST_EQUAL(j, 513);
     1038
     1039    return true;
     1040}
     1041
     1042static void test_emptyterm1_helper(Xapian::Database & db)
     1043{
     1044    // Don't bother with postlist_begin() because allpostlist tests cover that.
     1045    TEST_EXCEPTION(Xapian::InvalidArgumentError, db.positionlist_begin(1, ""));
     1046    TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
     1047    TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
     1048    TEST_EQUAL(db.get_doccount(), db.get_collection_freq(""));
     1049}
     1050
     1051// tests results of passing an empty term to various methods
     1052static bool test_emptyterm1()
     1053{
     1054    Xapian::Database db(get_database("apitest_manydocs"));
     1055    TEST_EQUAL(db.get_doccount(), 512);
     1056    test_emptyterm1_helper(db);
     1057
     1058    db = get_database("apitest_onedoc");
     1059    TEST_EQUAL(db.get_doccount(), 1);
     1060    test_emptyterm1_helper(db);
     1061
     1062    db = get_database();
     1063    TEST_EQUAL(db.get_doccount(), 0);
     1064    test_emptyterm1_helper(db);
     1065
     1066    return true;
     1067}
     1068
    10141069// tests collection frequency
    10151070static bool test_collfreq1()
    10161071{
     
    13791434    {"postlist4",          test_postlist4},
    13801435    {"postlist5",          test_postlist5},
    13811436    {"postlist6",          test_postlist6},
     1437    {"allpostlist1",       test_allpostlist1},
     1438    {"emptyterm1",         test_emptyterm1},
    13821439    {"termstats",          test_termstats},
    13831440    {"sortvalue1",         test_sortvalue1},
    13841441    // consistency1 will run on the remote backend, but it's particularly slow
  • tests/api_wrdb.cc

     
    44 * Copyright 2001 Hein Ragas
    55 * Copyright 2002 Ananova Ltd
    66 * Copyright 2002,2003,2004,2005,2006 Olly Betts
     7 * Copyright 2006 Richard Boulton
    78 *
    89 * This program is free software; you can redistribute it and/or
    910 * modify it under the terms of the GNU General Public License as
     
    909910    return true;
    910911}
    911912
     913// tests all document postlists
     914static bool test_allpostlist2()
     915{
     916    Xapian::WritableDatabase db(get_writable_database("apitest_manydocs"));
     917    Xapian::PostingIterator i = db.postlist_begin("");
     918    unsigned int j = 1;
     919    while (i != db.postlist_end("")) {
     920        TEST_EQUAL(*i, j);
     921        i++;
     922        j++;
     923    }
     924    TEST_EQUAL(j, 513);
     925
     926    db.delete_document(1);
     927    db.delete_document(50);
     928    db.delete_document(512);
     929
     930    i = db.postlist_begin("");
     931    j = 2;
     932    while (i != db.postlist_end("")) {
     933        TEST_EQUAL(*i, j);
     934        i++;
     935        j++;
     936        if (j == 50) j++;
     937    }
     938    TEST_EQUAL(j, 512);
     939
     940    i = db.postlist_begin("");
     941    j = 2;
     942    while (i != db.postlist_end("")) {
     943        TEST_EQUAL(*i, j);
     944        i++;
     945        j++;
     946        if (j == 40) {
     947            j += 10;
     948            i.skip_to(j);
     949            j++;
     950        }
     951    }
     952    TEST_EQUAL(j, 512);
     953
     954    return true;
     955}
     956
     957static void test_emptyterm2_helper(Xapian::WritableDatabase & db)
     958{
     959    // Don't bother with postlist_begin() because allpostlist tests cover that.
     960    TEST_EXCEPTION(Xapian::InvalidArgumentError, db.positionlist_begin(1, ""));
     961    TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
     962    TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
     963    TEST_EQUAL(db.get_doccount(), db.get_collection_freq(""));
     964}
     965
     966// tests results of passing an empty term to various methods
     967// equivalent of emptyterm1 for a writable database
     968static bool test_emptyterm2()
     969{
     970    Xapian::WritableDatabase db(get_writable_database("apitest_manydocs"));
     971    TEST_EQUAL(db.get_doccount(), 512);
     972    test_emptyterm2_helper(db);
     973    db.delete_document(1);
     974    TEST_EQUAL(db.get_doccount(), 511);
     975    test_emptyterm2_helper(db);
     976    db.delete_document(50);
     977    TEST_EQUAL(db.get_doccount(), 510);
     978    test_emptyterm2_helper(db);
     979    db.delete_document(512);
     980    TEST_EQUAL(db.get_doccount(), 509);
     981    test_emptyterm2_helper(db);
     982
     983    db = get_writable_database("apitest_onedoc");
     984    TEST_EQUAL(db.get_doccount(), 1);
     985    test_emptyterm2_helper(db);
     986    db.delete_document(1);
     987    TEST_EQUAL(db.get_doccount(), 0);
     988    test_emptyterm2_helper(db);
     989
     990    db = get_writable_database("");
     991    TEST_EQUAL(db.get_doccount(), 0);
     992    test_emptyterm2_helper(db);
     993
     994    return true;
     995}
     996
    912997// Check that PHRASE/NEAR becomes AND if there's no positional info in the
    913998// database.
    914999static bool test_phraseorneartoand1()
     
    10351120    {"replacedoc3",        test_replacedoc3},
    10361121    {"replacedoc4",        test_replacedoc4},
    10371122    {"uniqueterm1",        test_uniqueterm1},
     1123    {"emptyterm2",         test_emptyterm2},
    10381124    {"phraseorneartoand1", test_phraseorneartoand1},
    10391125    {"longpositionlist1",  test_longpositionlist1},
    10401126    {0, 0}
    10411127};
     1128
     1129/// The tests which use a writable, but local, backend
     1130test_desc writablelocaldb_tests[] = {
     1131    {"allpostlist2",       test_allpostlist2},
     1132    {0, 0}
     1133};
  • tests/api_wrdb.h

     
    33 * ----START-LICENCE----
    44 * Copyright 1999,2000,2001 BrightStation PLC
    55 * Copyright 2004 Olly Betts
     6 * Copyright 2006 Richard Boulton
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    2728#include "testsuite.h"
    2829
    2930extern test_desc writabledb_tests[];
     31extern test_desc writablelocaldb_tests[];
    3032
    3133#endif /* XAPIAN_HGUARD_API_WRDB_H */
  • include/xapian/database.h

     
    44/* Copyright 1999,2000,2001 BrightStation PLC
    55 * Copyright 2002 Ananova Ltd
    66 * Copyright 2002,2003,2004,2005,2006 Olly Betts
     7 * Copyright 2006 Richard Boulton
    78 *
    89 * This program is free software; you can redistribute it and/or
    910 * modify it under the terms of the GNU General Public License as
     
    108109
    109110        /** An iterator pointing to the start of the postlist
    110111         *  for a given term.
     112         *
     113         *  If the term name is the empty string, the iterator returned
     114         *  will list all the documents in the database.  Such an iterator
     115         *  will always return a WDF value of 1, since there is no obvious
     116         *  meaning for this quantity in this case.
    111117         */
    112118        PostingIterator postlist_begin(const std::string &tname) const;
    113119
  • ChangeLog

     
     1Wed Oct 11 19:05:38 BST 2006  Richard Boulton <richard@lemurconsulting.com>
     2
     3        * common/database.h,api/omdatabase.cc,
     4          backends/inmemory/inmemory_database.cc,
     5          backends/inmemory/inmemory_database.h,
     6          backends/quartz/Makefile.am,backends/quartz/quartz_database.cc,
     7          backends/quartz/quartz_alldocspostlist.h,
     8          backends/quartz/quartz_alldocspostlist.cc,
     9          backends/flint/Makefile.am,backends/flint/flint_database.cc,
     10          backends/flint/flint_alldocspostlist.cc,
     11          backends/flint/flint_alldocspostlist.h:
     12          Implement posting lists which return a list of all documents in
     13          the database.  Such a posting list is obtained by calling
     14          Xapian::Database::postlist_begin() with an empty term (ie, "").
     15          Also, all Xapian::Database methods which take a termname now
     16          accept an empty term, and return appropriate values (ie,
     17          get_termfreq("") and get_collection_freq("") return the number of
     18          documents in the database, and term_exists("") returns true
     19          unless the database is empty).
     20        * docs/quartzdesign.html: Document the inefficiency of all-document
     21          postlists for Quartz.
     22        * tests/apitest.cc,tests/apitest.h,tests/api_db.cc,
     23          tests/api_wrdb.cc,tests/api_wrdb.h: Add tests for all-document
     24          postlists, and for passing an empty term to all the applicable
     25          datbase methods.  This defines the new tests allpostlist[12], and
     26          emptyterm[12] Includes defining a new test category for databases
     27          which are local and writable, and adding a function to get an
     28          empty database (for testing the empty term methods with such a
     29          database).
     30
    131Tue Oct 10 17:24:00 BST 2006  Olly Betts <olly@survex.com>
    232
    333        * NEWS: Bump release date.
  • common/database.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006 Olly Betts
     6 * Copyright 2006 Richard Boulton
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    188189         *                use.
    189190         */
    190191        LeafPostList * open_post_list(const string & tname) const {
    191             if (!term_exists(tname)) {
     192            if (!tname.empty() && !term_exists(tname)) {
    192193                DEBUGLINE(MATCH, tname + " is not in database.");
    193194                // Term doesn't exist in this database.  However, we create
    194195                // a (empty) postlist for it to help make distributed searching
  • api/omdatabase.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2001,2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006 Olly Betts
     6 * Copyright 2006 Richard Boulton
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    101102Database::postlist_begin(const string &tname) const
    102103{
    103104    DEBUGAPICALL(PostingIterator, "Database::postlist_begin", tname);
    104     if (tname.empty())
    105         throw InvalidArgumentError("Zero length terms are invalid");
    106105
    107106    // Don't bother checking that the term exists first.  If it does, we
    108107    // just end up doing more work, and if it doesn't, we save very little
     
    248247Database::get_termfreq(const string & tname) const
    249248{
    250249    DEBUGAPICALL(Xapian::doccount, "Database::get_termfreq", tname);
    251     if (tname.empty())
    252         throw InvalidArgumentError("Zero length terms are invalid");
     250    if (tname.empty()) {
     251        return get_doccount();
     252    }
    253253    Xapian::doccount tf = 0;
    254254    vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
    255255    for (i = internal.begin(); i != internal.end(); i++) {
     
    262262Database::get_collection_freq(const string & tname) const
    263263{
    264264    DEBUGAPICALL(Xapian::termcount, "Database::get_collection_freq", tname);
    265     if (tname.empty())
    266         throw InvalidArgumentError("Zero length terms are invalid");
     265    if (tname.empty()) {
     266        return get_doccount();
     267    }
    267268
    268269    Xapian::termcount cf = 0;
    269270    vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
     
    303304bool
    304305Database::term_exists(const string & tname) const
    305306{
    306     if (tname.empty())
    307         throw InvalidArgumentError("Zero length terms are invalid");
     307    if (tname.empty()) {
     308        return get_doccount() != 0;
     309    }
    308310    vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
    309311    for (i = internal.begin(); i != internal.end(); ++i) {
    310312        if ((*i)->term_exists(tname)) return true;
  • backends/inmemory/inmemory_database.cc

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006 Olly Betts
     6 * Copyright 2006 Richard Boulton
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    260261    return Xapian::PositionIterator(db->open_position_list(did, (*pos).tname));
    261262}
    262263
     264/////////////////////////////
     265// InMemoryAllDocsPostList //
     266/////////////////////////////
     267
     268InMemoryAllDocsPostList::InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_)
     269        : did(0), db(db_)
     270{
     271}
     272
     273Xapian::doccount
     274InMemoryAllDocsPostList::get_termfreq() const
     275{
     276    return db->totdocs;
     277}
     278   
     279Xapian::docid
     280InMemoryAllDocsPostList::get_docid() const
     281{
     282    Assert(did > 0);
     283    Assert(did <= db->termlists.size());
     284    Assert(db->termlists[did - 1].is_valid);
     285    return did;
     286}
     287
     288Xapian::doclength
     289InMemoryAllDocsPostList::get_doclength() const
     290{
     291    return db->get_doclength(did);
     292}
     293
     294Xapian::termcount
     295InMemoryAllDocsPostList::get_wdf() const
     296{
     297    return 1;
     298}
     299
     300PositionList *
     301InMemoryAllDocsPostList::read_position_list()
     302{
     303    throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
     304}
     305
     306PositionList *
     307InMemoryAllDocsPostList::open_position_list() const
     308{
     309    throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
     310}
     311
     312PostList *
     313InMemoryAllDocsPostList::next(Xapian::weight /*w_min*/)
     314{
     315    Assert(!at_end());
     316    do {
     317       ++did;
     318    } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid);
     319    return NULL;
     320}
     321
     322PostList *
     323InMemoryAllDocsPostList::skip_to(Xapian::docid did_, Xapian::weight /*w_min*/)
     324{
     325    Assert(!at_end());
     326    if (did <= did_) {
     327        did = did_;
     328        while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) {
     329            ++did;
     330        }
     331    }
     332    return NULL;
     333}
     334
     335bool
     336InMemoryAllDocsPostList::at_end() const
     337{
     338    return (did > db->termlists.size());
     339}
     340
     341string
     342InMemoryAllDocsPostList::get_description() const
     343{
     344    return "InMemoryAllDocsPostList" + om_tostring(did);
     345}
     346
    263347///////////////////////////
    264348// Actual database class //
    265349///////////////////////////
     
    279363LeafPostList *
    280364InMemoryDatabase::do_open_post_list(const string & tname) const
    281365{
    282     Assert(tname.size() != 0);
     366    if (tname.empty()) {
     367        if (termlists.empty())
     368            return new EmptyPostList();
     369        return new InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase>(this));
     370    }
    283371    map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
    284372    if (i == postlists.end() || i->second.term_freq == 0)
    285373        return new EmptyPostList();
  • backends/inmemory/inmemory_database.h

     
    33 * Copyright 1999,2000,2001 BrightStation PLC
    44 * Copyright 2002 Ananova Ltd
    55 * Copyright 2002,2003,2004,2005,2006 Olly Betts
     6 * Copyright 2006 Richard Boulton
    67 *
    78 * This program is free software; you can redistribute it and/or
    89 * modify it under the terms of the GNU General Public License as
     
    159160        string get_description() const;
    160161};
    161162
     163/** A PostList over all docs in an inmemory database.
     164 */
     165class InMemoryAllDocsPostList : public LeafPostList {
     166    friend class InMemoryDatabase;
     167    private:
     168        Xapian::docid did;
     169
     170        Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
     171
     172        InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db);
     173    public:
     174        Xapian::doccount get_termfreq() const;
     175
     176        Xapian::docid       get_docid() const;     // Gets current docid
     177        Xapian::doclength   get_doclength() const; // Length of current document
     178        Xapian::termcount   get_wdf() const;              // Within Document Frequency
     179        PositionList * read_position_list();
     180        PositionList * open_position_list() const;
     181
     182        PostList *next(Xapian::weight w_min); // Moves to next docid
     183
     184        PostList *skip_to(Xapian::docid did, Xapian::weight w_min); // Moves to next docid >= specified docid
     185
     186        // True if we're off the end of the list
     187        bool at_end() const;
     188
     189        string get_description() const;
     190};
     191
    162192// Term List
    163193class InMemoryTermList : public LeafTermList {
    164194    friend class InMemoryDatabase;
     
    193223 *  This is a prototype database, mainly used for debugging and testing.
    194224 */
    195225class InMemoryDatabase : public Xapian::Database::Internal {
     226    friend class InMemoryAllDocsPostList;
    196227    private:
    197228        map<string, InMemoryTerm> postlists;
    198229        vector<InMemoryDoc> termlists;
  • backends/quartz/quartz_alldocspostlist.h

     
     1/* quartz_alldocspostlist.h: All document postlists in quartz databases
     2 *
     3 * ----START-LICENCE----
     4 * Copyright 1999,2000,2001 BrightStation PLC
     5 * Copyright 2002 Ananova Ltd
     6 * Copyright 2002,2003,2004,2005 Olly Betts
     7 * Copyright 2006 Richard Boulton
     8 *
     9 * This program is free software; you can redistribute it and/or
     10 * modify it under the terms of the GNU General Public License as
     11 * published by the Free Software Foundation; either version 2 of the
     12 * License, or (at your option) any later version.
     13 *
     14 * This program is distributed in the hope that it will be useful,
     15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17 * GNU General Public License for more details.
     18 *
     19 * You should have received a copy of the GNU General Public License
     20 * along with this program; if not, write to the Free Software
     21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
     22 * USA
     23 * -----END-LICENCE-----
     24 */
     25
     26#ifndef OM_HGUARD_QUARTZ_ALLDOCSPOSTLIST_H
     27#define OM_HGUARD_QUARTZ_ALLDOCSPOSTLIST_H
     28
     29#include <map>
     30#include <string>
     31
     32#include "leafpostlist.h"
     33#include <xapian/database.h>
     34#include <xapian/postingiterator.h>
     35#include "database.h"
     36#include "omassert.h"
     37#include "quartz_types.h"
     38#include "btree.h"
     39
     40using namespace std;
     41
     42class Bcursor;
     43
     44class QuartzDocIdList;
     45
     46class QuartzDocIdListIterator {
     47    private:
     48        const map<Xapian::docid, Xapian::docid> * ranges;
     49        map<Xapian::docid, Xapian::docid>::const_iterator currrange;
     50        Xapian::docid currdocid;
     51
     52        friend class QuartzDocIdList;
     53
     54        QuartzDocIdListIterator(const map<Xapian::docid, Xapian::docid> * ranges_);
     55        QuartzDocIdListIterator(const map<Xapian::docid, Xapian::docid> * ranges_, int);
     56
     57    public:
     58        Xapian::docid operator*() {
     59            return currdocid;
     60        }
     61
     62        friend bool operator==(const QuartzDocIdListIterator &a,
     63                               const QuartzDocIdListIterator &b);
     64
     65        QuartzDocIdListIterator();
     66        ~QuartzDocIdListIterator() {}
     67        QuartzDocIdListIterator(const QuartzDocIdListIterator & other);
     68        void operator=(const QuartzDocIdListIterator & other);
     69
     70        QuartzDocIdListIterator & operator++();
     71
     72        Xapian::DocIDWrapper operator++(int) {
     73            Xapian::docid tmp = **this;
     74            operator++();
     75            return Xapian::DocIDWrapper(tmp);
     76        }
     77
     78        Xapian::docid operator *() const { return currdocid; }
     79
     80        /// Allow use as an STL iterator
     81        //@{
     82        typedef std::input_iterator_tag iterator_category;
     83        typedef Xapian::docid value_type;
     84        typedef Xapian::doccount_diff difference_type;
     85        typedef Xapian::docid * pointer;
     86        typedef Xapian::docid & reference;
     87        //@}
     88};
     89
     90inline bool operator==(const QuartzDocIdListIterator &a,
     91                       const QuartzDocIdListIterator &b)
     92{
     93    if (a.ranges != b.ranges)
     94        return false;
     95    return a.currdocid == b.currdocid;
     96}
     97
     98inline bool operator!=(const QuartzDocIdListIterator &a,
     99                       const QuartzDocIdListIterator &b)
     100{
     101    return !(a==b);
     102}
     103
     104class QuartzDocIdList {
     105    private:
     106        /** Map from start of a range to end of a range.
     107         */
     108        map<Xapian::docid, Xapian::docid> ranges;
     109
     110    public:
     111        QuartzDocIdList() {}
     112        void addDocId(Xapian::docid did);
     113
     114        QuartzDocIdListIterator begin() const {
     115            return QuartzDocIdListIterator(&ranges);
     116        }
     117
     118        QuartzDocIdListIterator end() const {
     119            return QuartzDocIdListIterator(&ranges, 1);
     120        }
     121};
     122
     123/** A postlist in a quartz database.
     124 */
     125class QuartzAllDocsPostList : public LeafPostList {
     126    private:
     127        /// Pointer to database.
     128        Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db;
     129
     130        /// List of docids.
     131        QuartzDocIdList docids;
     132
     133        /// Iterator through docids.
     134        QuartzDocIdListIterator dociditer;
     135
     136        /// Number of documents in the database.
     137        Xapian::doccount doccount;
     138
     139        /// Whether we've started yet.
     140        bool have_started;
     141
     142        /// Copying is not allowed.
     143        QuartzAllDocsPostList(const QuartzAllDocsPostList &);
     144
     145        /// Assignment is not allowed.
     146        void operator=(const QuartzAllDocsPostList &);
     147
     148    public:
     149        /// Default constructor.
     150        QuartzAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_,
     151                              const Btree * table,
     152                              Xapian::doccount doccount_);
     153
     154        /// Destructor.
     155        ~QuartzAllDocsPostList();
     156
     157        /** Returns length of the all documents postlist.
     158         *
     159         *  This is also the number of documents in the database.
     160         */
     161        Xapian::doccount get_termfreq() const { return doccount; }
     162
     163        /** Returns the number of occurrences of the term in the database.
     164         *
     165         *  We pretend that each document has one "empty" term, so this is
     166         *  also the number of documents in the database.
     167         */
     168        Xapian::termcount get_collection_freq() const { return doccount; }
     169
     170        /// Returns the current docid.
     171        Xapian::docid get_docid() const {
     172            Assert(have_started);
     173            return *dociditer;
     174        }
     175
     176        /// Returns the length of current document.
     177        Xapian::doclength get_doclength() const {
     178            Assert(have_started);
     179            return this_db->get_doclength(*dociditer);
     180        }
     181
     182        /** Returns the Within Document Frequency of the term in the current
     183         *  document.
     184         */
     185        Xapian::termcount get_wdf() const {
     186            Assert(have_started);
     187            return static_cast<Xapian::termcount>(1);
     188        }
     189
     190        /** Get the list of positions of the term in the current document.
     191         */
     192        PositionList *read_position_list() {
     193            throw Xapian::InvalidOperationError("Can't read position list from all docs postlist.");
     194        }
     195
     196        /** Get the list of positions of the term in the current document.
     197         */
     198        PositionList * open_position_list() const {
     199            throw Xapian::InvalidOperationError("Can't read position list from all docs postlist.");
     200        }
     201
     202        /// Move to the next document.
     203        PostList * next(Xapian::weight w_min);
     204
     205        /// Skip to next document with docid >= docid.
     206        PostList * skip_to(Xapian::docid desired_did, Xapian::weight w_min);
     207
     208        /// Return true if and only if we're off the end of the list.
     209        bool at_end() const { return (have_started && dociditer == docids.end()); }
     210
     211        /// Get a description of the postlist.
     212        std::string get_description() const;
     213};
     214
     215#endif /* OM_HGUARD_QUARTZ_ALLDOCSPOSTLIST_H */
  • backends/quartz/quartz_database.cc

     
    44 * Copyright 2001 Hein Ragas
    55 * Copyright 2002 Ananova Ltd
    66 * Copyright 2002,2003,2004,2005,2006 Olly Betts
     7 * Copyright 2006 Richard Boulton
    78 *
    89 * This program is free software; you can redistribute it and/or
    910 * modify it under the terms of the GNU General Public License as
     
    3334#include <xapian/valueiterator.h>
    3435
    3536#include "quartz_postlist.h"
     37#include "quartz_alldocspostlist.h"
    3638#include "quartz_termlist.h"
    3739#include "quartz_positionlist.h"
    3840#include "quartz_utils.h"
     
    598600QuartzDatabase::do_open_post_list(const string& tname) const
    599601{
    600602    DEBUGCALL(DB, LeafPostList *, "QuartzDatabase::do_open_post_list", tname);
    601     Assert(!tname.empty());
     603    Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this);
    602604
    603     Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this);
    604     return(new QuartzPostList(ptrtothis,
     605    if (tname.empty()) {
     606        RETURN(new QuartzAllDocsPostList(ptrtothis,
     607                                         &termlist_table,
     608                                         get_doccount()));
     609    }
     610
     611    RETURN(new QuartzPostList(ptrtothis,
    605612                              &postlist_table,
    606613                              &positionlist_table,
    607614                              tname));
     
    11111118QuartzWritableDatabase::do_open_post_list(const string& tname) const
    11121119{
    11131120    DEBUGCALL(DB, LeafPostList *, "QuartzWritableDatabase::do_open_post_list", tname);
    1114     Assert(!tname.empty());
     1121    Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
    11151122
     1123    if (tname.empty()) {
     1124        RETURN(new QuartzAllDocsPostList(ptrtothis,
     1125                                         &database_ro.termlist_table,
     1126                                         get_doccount()));
     1127    }
     1128
    11161129    // Need to flush iff we've got buffered changes to this term's postlist.
    11171130    map<string, map<docid, pair<char, termcount> > >::const_iterator j;
    11181131    j = mod_plists.find(tname);
     
    11221135        do_flush_const();
    11231136    }
    11241137
    1125     Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
    1126     return(new QuartzPostList(ptrtothis,
     1138    RETURN(new QuartzPostList(ptrtothis,
    11271139                              &database_ro.postlist_table,
    11281140                              &database_ro.positionlist_table,
    11291141                              tname));
  • backends/quartz/Makefile.am

     
    1212                       quartz_utils.h \
    1313                       quartz_log.h \
    1414                       quartz_document.h \
     15                       quartz_alldocspostlist.h \
    1516                       quartz_alltermslist.h \
    1617                       quartz_metafile.h \
    1718                       btree.h \
     
    2728                       quartz_values.cc \
    2829                       quartz_log.cc \
    2930                       quartz_document.cc \
     31                       quartz_alldocspostlist.cc \
    3032                       quartz_alltermslist.cc \
    3133                       quartz_metafile.cc \
    3234                       btree.cc \
  • backends/quartz/quartz_alldocspostlist.cc

     
     1/* quartz_alldocspostlist.cc: All-document postlists in quartz databases
     2 *
     3 * ----START-LICENCE----
     4 * Copyright 1999,2000,2001 BrightStation PLC
     5 * Copyright 2002,2003,2004,2005 Olly Betts
     6 * Copyright 2006 Richard Boulton
     7 *
     8 * This program is free software; you can redistribute it and/or
     9 * modify it under the terms of the GNU General Public License as
     10 * published by the Free Software Foundation; either version 2 of the
     11 * License, or (at your option) any later version.
     12 *
     13 * This program is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with this program; if not, write to the Free Software
     20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
     21 * USA
     22 * -----END-LICENCE-----
     23 */
     24
     25#include <config.h>
     26#include "omdebug.h"
     27#include "quartz_alldocspostlist.h"
     28#include "quartz_utils.h"
     29#include "bcursor.h"
     30#include "database.h"
     31#include <map>
     32
     33QuartzDocIdListIterator::QuartzDocIdListIterator()
     34        : ranges(NULL),
     35          currrange(),
     36          currdocid(0)
     37{
     38    DEBUGCALL(DB, void,
     39              "QuartzDocIdListIterator::QuartzDocIdListIterator", "");
     40}
     41
     42QuartzDocIdListIterator::QuartzDocIdListIterator(const map<Xapian::docid, Xapian::docid> * ranges_, int)
     43        : ranges(ranges_),
     44          currrange(ranges_->end()),
     45          currdocid(0)
     46{
     47}
     48
     49QuartzDocIdListIterator::QuartzDocIdListIterator(const map<Xapian::docid, Xapian::docid> * ranges_)
     50        : ranges(ranges_),
     51          currrange(ranges_->begin()),
     52          currdocid(0)
     53{
     54    DEBUGCALL(DB, void,
     55              "QuartzDocIdListIterator::QuartzDocIdListIterator", "ranges");
     56    if (currrange != ranges_->end()) {
     57        currdocid = currrange->first;
     58    }
     59
     60    map<Xapian::docid, Xapian::docid>::const_iterator i;
     61    for (i = ranges->begin(); i != ranges->end(); i++) {
     62        DEBUGLINE(DB, "Docid range begin=" << i->first << ", end=" << i->second);
     63    }
     64}
     65
     66QuartzDocIdListIterator::QuartzDocIdListIterator(const QuartzDocIdListIterator & other)
     67        : ranges(other.ranges),
     68          currrange(other.currrange),
     69          currdocid(other.currdocid)
     70{
     71    DEBUGCALL(DB, void,
     72              "QuartzDocIdListIterator::~QuartzDocIdListIterator", "other");
     73}
     74
     75void
     76QuartzDocIdListIterator::operator=(const QuartzDocIdListIterator & other)
     77{
     78    DEBUGCALL(DB, void,
     79              "QuartzDocIdListIterator::operator=", "other");
     80    ranges = other.ranges;
     81    currrange = other.currrange;
     82    currdocid = other.currdocid;
     83}
     84
     85QuartzDocIdListIterator &
     86QuartzDocIdListIterator::operator++()
     87{
     88    DEBUGCALL(DB, void,
     89              "QuartzDocIdListIterator::operator++", "");
     90    DEBUGLINE(DB, string("Moved from ") <<
     91              (currrange == ranges->end() ? string("end.") : string("docid = ") +
     92               om_tostring(currdocid)));
     93
     94    if (currrange != ranges->end()) {
     95        Assert(currrange->first <= currdocid);
     96        if (currdocid < currrange->second) {
     97            currdocid++;
     98        } else {
     99            currrange++;
     100            if (currrange == ranges->end()) {
     101                currdocid = 0;
     102            } else {
     103                Assert(currrange->first > currdocid);
     104                currdocid = currrange->first;
     105            }
     106        }
     107    }
     108
     109    DEBUGLINE(DB, string("Moved to ") <<
     110              (currrange == ranges->end() ? string("end.") : string("docid = ") +
     111               om_tostring(currdocid)));
     112
     113    return *this;
     114}
     115
     116
     117void
     118QuartzDocIdList::addDocId(Xapian::docid did) {
     119    DEBUGCALL(DB, void, "QuartzDocIdList::addDocId", did);
     120
     121    if(ranges.size() == 0) {
     122        ranges.insert(pair<Xapian::docid, Xapian::docid>(did, did));
     123        return;
     124    }
     125
     126    if (did < ranges.begin()->first) {
     127        Xapian::docid newend;
     128        if (did == ranges.begin()->first - 1) {
     129            newend = ranges.begin()->second;
     130            ranges.erase(ranges.begin());
     131        } else {
     132            newend = did;
     133        }
     134        ranges[did] = newend;
     135        return;
     136    }
     137
     138    map<Xapian::docid, Xapian::docid>::iterator i;
     139    i = ranges.lower_bound(did);
     140    if (i == ranges.end()) {
     141        i--;
     142        Assert(did > i->first);
     143    } else if (did < i->first) {
     144        i--;
     145        Assert(did > i->first);
     146    }
     147    Assert(did >= i->first);
     148
     149    if (did <= i->second) {
     150        // Do nothing - already in range
     151        return;
     152    }
     153
     154    if (did == i->second + 1) {
     155        // Extend range
     156        i->second = did;
     157        map<Xapian::docid, Xapian::docid>::iterator j;
     158        j = i;
     159        j++;
     160        if (j != ranges.end()) {
     161            Assert(j->first > i->second);
     162            if (j->first == i->second + 1) {
     163                // Merge ranges
     164                i->second = j->second;
     165                ranges.erase(j);
     166            }
     167        }
     168    } else {
     169        ranges[did] = did;
     170    }
     171}
     172
     173
     174QuartzAllDocsPostList::QuartzAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_,
     175                                             const Btree * table,
     176                                             Xapian::doccount doccount_)
     177        : this_db(this_db_),
     178          docids(),
     179          dociditer(),
     180          doccount(doccount_),
     181          have_started(false)
     182{
     183    DEBUGCALL(DB, void, "QuartzAllDocsPostList::QuartzAllDocsPostList",
     184              this_db_.get() << ", " << table << ", " << doccount_);
     185
     186    // Move to initial NULL entry.
     187    Bcursor * cursor = table->cursor_get();
     188    cursor->find_entry("");
     189    if (!cursor->after_end())
     190        cursor->next();
     191    while (!cursor->after_end()) {
     192        string key = cursor->current_key;
     193        const char * keystr = key.c_str();
     194        Xapian::docid did;
     195        if (!unpack_uint_last(&keystr, keystr + key.length(), &did)) {
     196            throw Xapian::RangeError("Document number in value table is too large");
     197        }
     198        docids.addDocId(did);
     199        cursor->next();
     200    }
     201}
     202
     203QuartzAllDocsPostList::~QuartzAllDocsPostList()
     204{
     205    DEBUGCALL(DB, void, "QuartzAllDocsPostList::~QuartzAllDocsPostList", "");
     206}
     207
     208PostList *
     209QuartzAllDocsPostList::next(Xapian::weight w_min)
     210{
     211    DEBUGCALL(DB, PostList *, "QuartzAllDocsPostList::next", w_min);
     212    (void)w_min;
     213
     214    if (have_started) {
     215        ++dociditer;
     216    } else {
     217        dociditer = docids.begin();
     218        have_started = true;
     219    }
     220
     221    DEBUGLINE(DB, string("Moved to ") <<
     222              (dociditer == docids.end() ? string("end.") : string("docid = ") +
     223               om_tostring(*dociditer)));
     224
     225    RETURN(NULL);
     226}
     227
     228PostList *
     229QuartzAllDocsPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
     230{
     231    DEBUGCALL(DB, PostList *,
     232              "QuartzAllDocsPostList::skip_to", desired_did << ", " << w_min);
     233    (void)w_min; // no warning
     234
     235    // Don't skip back, and don't need to do anything if already there.
     236    if (!have_started) {
     237        dociditer = docids.begin();
     238    }
     239    if (dociditer == docids.end()) RETURN(NULL);
     240    if (desired_did <= *dociditer) RETURN(NULL);
     241
     242    while (dociditer != docids.end() && *dociditer < desired_did)
     243    {
     244        ++dociditer;
     245    }
     246
     247    DEBUGLINE(DB, string("Skipped to ") <<
     248              (dociditer == docids.end() ? string("end.") : string("docid = ") +
     249               om_tostring(*dociditer)));
     250
     251    RETURN(NULL);
     252}
     253
     254string
     255QuartzAllDocsPostList::get_description() const
     256{
     257    return ":" + om_tostring(doccount);
     258}
  • backends/flint/flint_database.cc

     
    44 * Copyright 2001 Hein Ragas
    55 * Copyright 2002 Ananova Ltd
    66 * Copyright 2002,2003,2004,2005,2006 Olly Betts
     7 * Copyright 2006 Richard Boulton
    78 *
    89 * This program is free software; you can redistribute it and/or
    910 * modify it under the terms of the GNU General Public License as
     
    3435
    3536#include "flint_modifiedpostlist.h"
    3637#include "flint_postlist.h"
     38#include "flint_alldocspostlist.h"
    3739#include "flint_termlist.h"
    3840#include "flint_positionlist.h"
    3941#include "flint_utils.h"
     
    455457FlintDatabase::do_open_post_list(const string& tname) const
    456458{
    457459    DEBUGCALL(DB, LeafPostList *, "FlintDatabase::do_open_post_list", tname);
    458     Assert(!tname.empty());
     460    Xapian::Internal::RefCntPtr<const FlintDatabase> ptrtothis(this);
    459461
    460     Xapian::Internal::RefCntPtr<const FlintDatabase> ptrtothis(this);
    461     return(new FlintPostList(ptrtothis,
     462    if (tname.empty()) {
     463        RETURN(new FlintAllDocsPostList(ptrtothis,
     464                                        &termlist_table,
     465                                        get_doccount()));
     466    }
     467
     468    RETURN(new FlintPostList(ptrtothis,
    462469                              &postlist_table,
    463470                              &positionlist_table,
    464471                              tname));
     
    967974FlintWritableDatabase::do_open_post_list(const string& tname) const
    968975{
    969976    DEBUGCALL(DB, LeafPostList *, "FlintWritableDatabase::do_open_post_list", tname);
    970     Assert(!tname.empty());
    971 
    972977    Xapian::Internal::RefCntPtr<const FlintWritableDatabase> ptrtothis(this);
    973978
     979    if (tname.empty()) {
     980        RETURN(new FlintAllDocsPostList(ptrtothis,
     981                                        &database_ro.termlist_table,
     982                                        get_doccount()));
     983    }
     984
    974985    map<string, map<docid, pair<char, termcount> > >::const_iterator j;
    975986    j = mod_plists.find(tname);
    976987    if (j != mod_plists.end()) {
  • backends/flint/flint_alldocspostlist.cc

     
     1/* flint_alldocspostlist.cc: All-document postlists in flint databases
     2 *
     3 * ----START-LICENCE----
     4 * Copyright 1999,2000,2001 BrightStation PLC
     5 * Copyright 2002,2003,2004,2005 Olly Betts
     6 * Copyright 2006 Richard Boulton
     7 *
     8 * This program is free software; you can redistribute it and/or
     9 * modify it under the terms of the GNU General Public License as
     10 * published by the Free Software Foundation; either version 2 of the
     11 * License, or (at your option) any later version.
     12 *
     13 * This program is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with this program; if not, write to the Free Software
     20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
     21 * USA
     22 * -----END-LICENCE-----
     23 */
     24
     25#include <config.h>
     26#include "omdebug.h"
     27#include "flint_alldocspostlist.h"
     28#include "flint_utils.h"
     29#include "flint_values.h"
     30#include "flint_cursor.h"
     31#include "database.h"
     32
     33/** The format of a postlist is:
     34 */
     35FlintAllDocsPostList::FlintAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_,
     36                                           const FlintTable * table_,
     37                                           Xapian::doccount doccount_)
     38        : this_db(this_db_),
     39          table(table_),
     40          cursor(table->cursor_get()),
     41          did(0),
     42          is_at_end(false),
     43          doccount(doccount_)
     44{
     45    DEBUGCALL(DB, void, "FlintAllDocsPostList::FlintAllDocsPostList",
     46              this_db_.get() << ", " << table_ << ", " << doccount_);
     47
     48    // Move to initial NULL entry.
     49    cursor->find_entry("");
     50}
     51
     52FlintAllDocsPostList::~FlintAllDocsPostList()
     53{
     54    DEBUGCALL(DB, void, "FlintAllDocsPostList::~FlintAllDocsPostList", "");
     55}
     56
     57PostList *
     58FlintAllDocsPostList::next(Xapian::weight w_min)
     59{
     60    DEBUGCALL(DB, PostList *, "FlintAllDocsPostList::next", w_min);
     61    (void)w_min; // no warning
     62
     63    cursor->next();
     64    if (cursor->after_end()) {
     65        is_at_end = true;
     66    } else {
     67        string key = cursor->current_key;
     68        const char * keystr = key.c_str();
     69        if (!unpack_uint_preserving_sort(&keystr, keystr + key.length(), &did)) {
     70            if (*keystr == 0)
     71                throw Xapian::DatabaseCorruptError("Unexpected end of data when reading from termlist table");
     72            else
     73                throw Xapian::RangeError("Document number in value table is too large");
     74        }
     75    }
     76
     77    DEBUGLINE(DB, string("Moved to ") <<
     78              (is_at_end ? string("end.") : string("docid = ") +
     79               om_tostring(did)));
     80
     81    RETURN(NULL);
     82}
     83
     84PostList *
     85FlintAllDocsPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
     86{
     87    DEBUGCALL(DB, PostList *,
     88              "FlintAllDocsPostList::skip_to", desired_did << ", " << w_min);
     89    (void)w_min; // no warning
     90
     91    // Don't skip back, and don't need to do anything if already there.
     92    if (desired_did <= did) RETURN(NULL);
     93    if (is_at_end) RETURN(NULL);
     94
     95    string desired_key = pack_uint_preserving_sort(desired_did);
     96    bool exact_match = cursor->find_entry(desired_key);
     97    if (!exact_match)
     98        cursor->next();
     99    if (cursor->after_end()) {
     100        is_at_end = true;
     101    } else {
     102        string key = cursor->current_key;
     103        const char * keystr = key.c_str();
     104        if (!unpack_uint_preserving_sort(&keystr, keystr + key.length(), &did)) {
     105            if (*keystr == 0)
     106                throw Xapian::DatabaseCorruptError("Unexpected end of data when reading from termlist table");
     107            else
     108                throw Xapian::RangeError("Document number in value table is too large");
     109        }
     110    }
     111
     112    DEBUGLINE(DB, string("Skipped to ") <<
     113              (is_at_end ? string("end.") : string("docid = ") +
     114               om_tostring(did)));
     115
     116    RETURN(NULL);
     117}
     118
     119string
     120FlintAllDocsPostList::get_description() const
     121{
     122    return ":" + om_tostring(doccount);
     123}
  • backends/flint/flint_alldocspostlist.h

     
     1/* flint_alldocspostlist.h: All document postlists in flint databases
     2 *
     3 * ----START-LICENCE----
     4 * Copyright 1999,2000,2001 BrightStation PLC
     5 * Copyright 2002 Ananova Ltd
     6 * Copyright 2002,2003,2004,2005 Olly Betts
     7 * Copyright 2006 Richard Boulton
     8 *
     9 * This program is free software; you can redistribute it and/or
     10 * modify it under the terms of the GNU General Public License as
     11 * published by the Free Software Foundation; either version 2 of the
     12 * License, or (at your option) any later version.
     13 *
     14 * This program is distributed in the hope that it will be useful,
     15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17 * GNU General Public License for more details.
     18 *
     19 * You should have received a copy of the GNU General Public License
     20 * along with this program; if not, write to the Free Software
     21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
     22 * USA
     23 * -----END-LICENCE-----
     24 */
     25
     26#ifndef OM_HGUARD_FLINT_ALLDOCSPOSTLIST_H
     27#define OM_HGUARD_FLINT_ALLDOCSPOSTLIST_H
     28
     29#include <map>
     30#include <string>
     31
     32#include "leafpostlist.h"
     33#include "database.h"
     34#include "omassert.h"
     35#include "flint_types.h"
     36
     37using namespace std;
     38
     39class FlintCursor;
     40class FlintTable;
     41
     42/** A postlist in a flint database.
     43 */
     44class FlintAllDocsPostList : public LeafPostList {
     45    private:
     46        /** The database we are searching.  This pointer is held so that the
     47         *  database doesn't get deleted before us.
     48         */
     49        Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db;
     50
     51        /// The table containing the values.
     52        const FlintTable * table;
     53
     54        /// Cursor pointing to current values.
     55        AutoPtr<FlintCursor> cursor;
     56
     57        /// Document id we're currently at.
     58        Xapian::docid did;
     59
     60        /// Whether we've run off the end of the list yet.
     61        bool is_at_end;
     62
     63        /// Number of documents in the database.
     64        Xapian::doccount doccount;
     65
     66        /// Copying is not allowed.
     67        FlintAllDocsPostList(const FlintAllDocsPostList &);
     68
     69        /// Assignment is not allowed.
     70        void operator=(const FlintAllDocsPostList &);
     71
     72
     73    public:
     74        /// Default constructor.
     75        FlintAllDocsPostList(Xapian::Internal::RefCntPtr<const Xapian::Database::Internal> this_db_,
     76                             const FlintTable * table_,
     77                             Xapian::doccount doccount_);
     78
     79        /// Destructor.
     80        ~FlintAllDocsPostList();
     81
     82        /** Returns length of the all documents postlist.
     83         *
     84         *  This is also the number of documents in the database.
     85         */
     86        Xapian::doccount get_termfreq() const { return doccount; }
     87
     88        /** Returns the number of occurrences of the term in the database.
     89         *
     90         *  We pretend that each document has one "empty" term, so this is
     91         *  also the number of documents in the database.
     92         */
     93        Xapian::termcount get_collection_freq() const { return doccount; }
     94
     95        /// Returns the current docid.
     96        Xapian::docid get_docid() const { Assert(did != 0); return did; }
     97
     98        /// Returns the length of current document.
     99        Xapian::doclength get_doclength() const {
     100            Assert(did != 0);
     101            return this_db->get_doclength(did);
     102        }
     103
     104        /** Returns the Within Document Frequency of the term in the current
     105         *  document.
     106         */
     107        Xapian::termcount get_wdf() const { Assert(did != 0); return static_cast<Xapian::termcount>(1); }
     108
     109        /** Get the list of positions of the term in the current document.
     110         */
     111        PositionList *read_position_list() {
     112            throw Xapian::InvalidOperationError("Can't read position list from all docs postlist.");
     113        }
     114
     115        /** Get the list of positions of the term in the current document.
     116         */
     117        PositionList * open_position_list() const {
     118            throw Xapian::InvalidOperationError("Can't read position list from all docs postlist.");
     119        }
     120
     121        /// Move to the next document.
     122        PostList * next(Xapian::weight w_min);
     123
     124        /// Skip to next document with docid >= docid.
     125        PostList * skip_to(Xapian::docid desired_did, Xapian::weight w_min);
     126
     127        /// Return true if and only if we're off the end of the list.
     128        bool at_end() const { return is_at_end; }
     129
     130        /// Get a description of the postlist.
     131        std::string get_description() const;
     132};
     133
     134#endif /* OM_HGUARD_FLINT_ALLDOCSPOSTLIST_H */
  • backends/flint/Makefile.am

     
    1212                       flint_values.h \
    1313                       flint_utils.h \
    1414                       flint_document.h \
     15                       flint_alldocspostlist.h \
    1516                       flint_alltermslist.h \
    1617                       flint_table.h \
    1718                       flint_cursor.h \
     
    2930                       flint_record.cc \
    3031                       flint_values.cc \
    3132                       flint_document.cc \
     33                       flint_alldocspostlist.cc \
    3234                       flint_alltermslist.cc \
    3335                       flint_table.cc \
    3436                       flint_cursor.cc \