Ticket #47: alldocpostlists.patch

File alldocpostlists.patch, 13.3 KB (added by Richard Boulton, 18 years ago)

Updated patch for inmemory case, and tests

  • tests/apitest.cc

     
    4545BackendManager backendmanager;
    4646
    4747Xapian::Database
     48get_database()
     49{
     50    vector<string> dbnames;
     51    return backendmanager.get_database(dbnames);
     52}
     53
     54Xapian::Database
    4855get_database(const string &dbname)
    4956{
    5057    return backendmanager.get_database(dbname);
     
    101108    RUNTESTS("inmemory", anydb);
    102109    RUNTESTS("inmemory", specchar);
    103110    RUNTESTS("inmemory", writabledb);
     111    RUNTESTS("inmemory", writablelocaldb);
    104112    RUNTESTS("inmemory", localdb);
    105113    RUNTESTS("inmemory", positionaldb);
    106114    RUNTESTS("inmemory", localpositionaldb);
     
    114122    RUNTESTS("flint", anydb);
    115123    RUNTESTS("flint", specchar);
    116124    RUNTESTS("flint", writabledb);
     125    RUNTESTS("flint", writablelocaldb);
    117126    RUNTESTS("flint", localdb);
    118127    RUNTESTS("flint", positionaldb);
    119128    RUNTESTS("flint", localpositionaldb);
     
    129138    RUNTESTS("quartz", anydb);
    130139    RUNTESTS("quartz", specchar);
    131140    RUNTESTS("quartz", writabledb);
     141    RUNTESTS("quartz", writablelocaldb);
    132142    RUNTESTS("quartz", localdb);
    133143    RUNTESTS("quartz", positionaldb);
    134144    RUNTESTS("quartz", localpositionaldb);
  • tests/apitest.h

     
    2626
    2727#include <xapian.h>
    2828
     29Xapian::Database get_database();
    2930Xapian::Database get_database(const std::string &dbname);
    3031Xapian::Database get_database(const std::string &dbname,
    3132                              const std::string &dbname2);
  • tests/api_db.cc

     
    10111011    return true;
    10121012}
    10131013
     1014// tests all document postlists
     1015static bool test_allpostlist1()
     1016{
     1017    Xapian::Database db(get_database("apitest_manydocs"));
     1018    Xapian::PostingIterator i = db.postlist_begin("");
     1019    unsigned int j = 1;
     1020    while (i != db.postlist_end("")) {
     1021        TEST_EQUAL(*i, j);
     1022        i++;
     1023        j++;
     1024    }
     1025    TEST_EQUAL(j, 513);
     1026    return true;
     1027}
     1028
     1029static void test_emptyterm1_helper(Xapian::Database & db)
     1030{
     1031    // Don't bother with postlist_begin() because allpostlist tests cover that.
     1032    TEST_EXCEPTION(Xapian::InvalidArgumentError, db.positionlist_begin(1, ""));
     1033    TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
     1034    TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
     1035    TEST_EQUAL(db.get_doccount(), db.get_collection_freq(""));
     1036}
     1037
     1038// tests results of passing an empty term to various methods
     1039static bool test_emptyterm1()
     1040{
     1041    Xapian::Database db(get_database("apitest_manydocs"));
     1042    TEST_EQUAL(db.get_doccount(), 512);
     1043    test_emptyterm1_helper(db);
     1044
     1045    db = get_database("apitest_onedoc");
     1046    TEST_EQUAL(db.get_doccount(), 1);
     1047    test_emptyterm1_helper(db);
     1048
     1049    db = get_database();
     1050    TEST_EQUAL(db.get_doccount(), 0);
     1051    test_emptyterm1_helper(db);
     1052
     1053    return true;
     1054}
     1055
    10141056// tests collection frequency
    10151057static bool test_collfreq1()
    10161058{
     
    13791421    {"postlist4",          test_postlist4},
    13801422    {"postlist5",          test_postlist5},
    13811423    {"postlist6",          test_postlist6},
     1424    {"allpostlist1",       test_allpostlist1},
     1425    {"emptyterm1",         test_emptyterm1},
    13821426    {"termstats",          test_termstats},
    13831427    {"sortvalue1",         test_sortvalue1},
    13841428    // consistency1 will run on the remote backend, but it's particularly slow
  • tests/api_wrdb.cc

     
    909909    return true;
    910910}
    911911
     912// tests all document postlists
     913static bool test_allpostlist2()
     914{
     915    Xapian::WritableDatabase db(get_writable_database("apitest_manydocs"));
     916    Xapian::PostingIterator i = db.postlist_begin("");
     917    unsigned int j = 1;
     918    while (i != db.postlist_end("")) {
     919        TEST_EQUAL(*i, j);
     920        i++;
     921        j++;
     922    }
     923    TEST_EQUAL(j, 513);
     924
     925    db.delete_document(1);
     926    db.delete_document(50);
     927    db.delete_document(512);
     928
     929    i = db.postlist_begin("");
     930    j = 2;
     931    while (i != db.postlist_end("")) {
     932        TEST_EQUAL(*i, j);
     933        i++;
     934        j++;
     935        if (j == 50) j++;
     936    }
     937    TEST_EQUAL(j, 512);
     938
     939    return true;
     940}
     941
     942static void test_emptyterm2_helper(Xapian::WritableDatabase & db)
     943{
     944    // Don't bother with postlist_begin() because allpostlist tests cover that.
     945    TEST_EXCEPTION(Xapian::InvalidArgumentError, db.positionlist_begin(1, ""));
     946    TEST_EQUAL(db.get_doccount(), db.get_termfreq(""));
     947    TEST_EQUAL(db.get_doccount() != 0, db.term_exists(""));
     948    TEST_EQUAL(db.get_doccount(), db.get_collection_freq(""));
     949}
     950
     951// tests results of passing an empty term to various methods
     952// equivalent of emptyterm1 for a writable database
     953static bool test_emptyterm2()
     954{
     955    Xapian::WritableDatabase db(get_writable_database("apitest_manydocs"));
     956    TEST_EQUAL(db.get_doccount(), 512);
     957    test_emptyterm2_helper(db);
     958    db.delete_document(1);
     959    TEST_EQUAL(db.get_doccount(), 511);
     960    test_emptyterm2_helper(db);
     961    db.delete_document(50);
     962    TEST_EQUAL(db.get_doccount(), 510);
     963    test_emptyterm2_helper(db);
     964    db.delete_document(512);
     965    TEST_EQUAL(db.get_doccount(), 509);
     966    test_emptyterm2_helper(db);
     967
     968    db = get_writable_database("apitest_onedoc");
     969    TEST_EQUAL(db.get_doccount(), 1);
     970    test_emptyterm2_helper(db);
     971    db.delete_document(1);
     972    TEST_EQUAL(db.get_doccount(), 0);
     973    test_emptyterm2_helper(db);
     974
     975    db = get_writable_database("");
     976    TEST_EQUAL(db.get_doccount(), 0);
     977    test_emptyterm2_helper(db);
     978
     979    return true;
     980}
     981
    912982// Check that PHRASE/NEAR becomes AND if there's no positional info in the
    913983// database.
    914984static bool test_phraseorneartoand1()
     
    10351105    {"replacedoc3",        test_replacedoc3},
    10361106    {"replacedoc4",        test_replacedoc4},
    10371107    {"uniqueterm1",        test_uniqueterm1},
     1108    {"emptyterm2",         test_emptyterm2},
    10381109    {"phraseorneartoand1", test_phraseorneartoand1},
    10391110    {"longpositionlist1",  test_longpositionlist1},
    10401111    {0, 0}
    10411112};
     1113
     1114/// The tests which use a writable, but local, backend
     1115test_desc writablelocaldb_tests[] = {
     1116    {"allpostlist2",       test_allpostlist2},
     1117    {0, 0}
     1118};
  • tests/api_wrdb.h

     
    2727#include "testsuite.h"
    2828
    2929extern test_desc writabledb_tests[];
     30extern test_desc writablelocaldb_tests[];
    3031
    3132#endif /* XAPIAN_HGUARD_API_WRDB_H */
  • common/database.h

     
    188188         *                use.
    189189         */
    190190        LeafPostList * open_post_list(const string & tname) const {
    191             if (!term_exists(tname)) {
     191            if (!tname.empty() && !term_exists(tname)) {
    192192                DEBUGLINE(MATCH, tname + " is not in database.");
    193193                // Term doesn't exist in this database.  However, we create
    194194                // a (empty) postlist for it to help make distributed searching
  • api/omdatabase.cc

     
    101101Database::postlist_begin(const string &tname) const
    102102{
    103103    DEBUGAPICALL(PostingIterator, "Database::postlist_begin", tname);
    104     if (tname.empty())
    105         throw InvalidArgumentError("Zero length terms are invalid");
    106104
    107105    // Don't bother checking that the term exists first.  If it does, we
    108106    // just end up doing more work, and if it doesn't, we save very little
     
    248246Database::get_termfreq(const string & tname) const
    249247{
    250248    DEBUGAPICALL(Xapian::doccount, "Database::get_termfreq", tname);
    251     if (tname.empty())
    252         throw InvalidArgumentError("Zero length terms are invalid");
     249    if (tname.empty()) {
     250        return get_doccount();
     251    }
    253252    Xapian::doccount tf = 0;
    254253    vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
    255254    for (i = internal.begin(); i != internal.end(); i++) {
     
    262261Database::get_collection_freq(const string & tname) const
    263262{
    264263    DEBUGAPICALL(Xapian::termcount, "Database::get_collection_freq", tname);
    265     if (tname.empty())
    266         throw InvalidArgumentError("Zero length terms are invalid");
     264    if (tname.empty()) {
     265        return get_doccount();
     266    }
    267267
    268268    Xapian::termcount cf = 0;
    269269    vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
     
    303303bool
    304304Database::term_exists(const string & tname) const
    305305{
    306     if (tname.empty())
    307         throw InvalidArgumentError("Zero length terms are invalid");
     306    if (tname.empty()) {
     307        return get_doccount() != 0;
     308    }
    308309    vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
    309310    for (i = internal.begin(); i != internal.end(); ++i) {
    310311        if ((*i)->term_exists(tname)) return true;
  • backends/inmemory/inmemory_database.cc

     
    260260    return Xapian::PositionIterator(db->open_position_list(did, (*pos).tname));
    261261}
    262262
     263/////////////////////////////
     264// InMemoryAllDocsPostList //
     265/////////////////////////////
     266
     267InMemoryAllDocsPostList::InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db_)
     268        : did(0), db(db_)
     269{
     270}
     271
     272Xapian::doccount
     273InMemoryAllDocsPostList::get_termfreq() const
     274{
     275    return db->totdocs;
     276}
     277   
     278Xapian::docid
     279InMemoryAllDocsPostList::get_docid() const
     280{
     281    Assert(did > 0);
     282    Assert(did <= db->termlists.size());
     283    Assert(db->termlists[did - 1].is_valid);
     284    return did;
     285}
     286
     287Xapian::doclength
     288InMemoryAllDocsPostList::get_doclength() const
     289{
     290    return db->get_doclength(did);
     291}
     292
     293Xapian::termcount
     294InMemoryAllDocsPostList::get_wdf() const
     295{
     296    return 1;
     297}
     298
     299PositionList *
     300InMemoryAllDocsPostList::read_position_list()
     301{
     302    throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
     303}
     304
     305PositionList *
     306InMemoryAllDocsPostList::open_position_list() const
     307{
     308    throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
     309}
     310
     311PostList *
     312InMemoryAllDocsPostList::next(Xapian::weight /*w_min*/)
     313{
     314    Assert(!at_end());
     315    do {
     316       ++did;
     317    } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid);
     318    return NULL;
     319}
     320
     321PostList *
     322InMemoryAllDocsPostList::skip_to(Xapian::docid did_, Xapian::weight /*w_min*/)
     323{
     324    Assert(!at_end());
     325    if (did <= did_) {
     326        did = did_;
     327        while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) {
     328            ++did;
     329        }
     330    }
     331    return NULL;
     332}
     333
     334bool
     335InMemoryAllDocsPostList::at_end() const
     336{
     337    return (did > db->termlists.size());
     338}
     339
     340string
     341InMemoryAllDocsPostList::get_description() const
     342{
     343    return "InMemoryAllDocsPostList" + om_tostring(did);
     344}
     345
    263346///////////////////////////
    264347// Actual database class //
    265348///////////////////////////
     
    279362LeafPostList *
    280363InMemoryDatabase::do_open_post_list(const string & tname) const
    281364{
    282     Assert(tname.size() != 0);
     365    if (tname.empty()) {
     366        if (termlists.empty())
     367            return new EmptyPostList();
     368        return new InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase>(this));
     369    }
    283370    map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
    284371    if (i == postlists.end() || i->second.term_freq == 0)
    285372        return new EmptyPostList();
  • backends/inmemory/inmemory_database.h

     
    159159        string get_description() const;
    160160};
    161161
     162/** A PostList over all docs in an inmemory database.
     163 */
     164class InMemoryAllDocsPostList : public LeafPostList {
     165    friend class InMemoryDatabase;
     166    private:
     167        Xapian::docid did;
     168
     169        Xapian::Internal::RefCntPtr<const InMemoryDatabase> db;
     170
     171        InMemoryAllDocsPostList(Xapian::Internal::RefCntPtr<const InMemoryDatabase> db);
     172    public:
     173        Xapian::doccount get_termfreq() const;
     174
     175        Xapian::docid       get_docid() const;     // Gets current docid
     176        Xapian::doclength   get_doclength() const; // Length of current document
     177        Xapian::termcount   get_wdf() const;              // Within Document Frequency
     178        PositionList * read_position_list();
     179        PositionList * open_position_list() const;
     180
     181        PostList *next(Xapian::weight w_min); // Moves to next docid
     182
     183        PostList *skip_to(Xapian::docid did, Xapian::weight w_min); // Moves to next docid >= specified docid
     184
     185        // True if we're off the end of the list
     186        bool at_end() const;
     187
     188        string get_description() const;
     189};
     190
    162191// Term List
    163192class InMemoryTermList : public LeafTermList {
    164193    friend class InMemoryDatabase;
     
    193222 *  This is a prototype database, mainly used for debugging and testing.
    194223 */
    195224class InMemoryDatabase : public Xapian::Database::Internal {
     225    friend class InMemoryAllDocsPostList;
    196226    private:
    197227        map<string, InMemoryTerm> postlists;
    198228        vector<InMemoryDoc> termlists;