root / tags / 1.0.8 / xapian-core / backends / flint / flint_termlist.cc

Revision 9463, 5.2 kB (checked in by olly, 15 months ago)

common/omassert.h: Rewritten from scratch. The new version only
includes headers if assertions are enabled, which should help
to speed up non-assertion builds by reducing unnecessary header
inclusion. Also, float.h and math.h are never now pulled in -
instead we use the new within_DBL_EPSILON() function. AssertNe?()
and AssertNeParanoid?() are never actually used, so replace them with
AssertRel?() and AssertRelParanoid? which allow the user to assert any
binary relation, not just inequality. Also, we now use rare() to
give branch prediction hints for assertion tests (since the failure
branch should never be taken).
common/omdebug.h,common/stringutils.h,tests/harness/testsuite.h:
Replace several definitions of the STRINGIZE macro with a single
version in common/stringutils.h.
backends/flint/,backends/inmemory/inmemory_database.cc,
backends/multi/multi_postlist.cc,backends/quartz/,
backends/remote/remote-database.cc,bin/quartzcheck.cc,
bin/xapian-compact.cc,common/stringutils.h,expand/expandweight.cc,
expand/ortermlist.cc,matcher/phrasepostlist.cc,
matcher/scaleweightpostlist.cc,net/remoteconnection.cc,
net/tcpserver.cc: Explicitly include headers which were previously
being pulled in implicitly by omassert.h.
HACKING: Update the documentation for assertion calls, and document
CompileTimeAssert?() (which previously wasn't documented here).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* flint_termlist.cc: Termlists in a flint database
2 *
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2006,2007 Olly Betts
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20 * USA
21 */
22
23#include <config.h>
24
25#include <xapian/error.h>
26
27#include "expandweight.h"
28#include "flint_positionlist.h"
29#include "flint_termlist.h"
30#include "flint_utils.h"
31#include "omassert.h"
32#include "utils.h"
33
34using namespace std;
35
36FlintTermList::FlintTermList(Xapian::Internal::RefCntPtr<const FlintDatabase> db_,
37                             Xapian::docid did_)
38        : db(db_), did(did_), current_wdf(0), current_termfreq(0)
39{
40    DEBUGCALL(DB, void, "FlintTermList",
41              "[RefCntPtr<const FlintDatabase>], " << did_);
42
43    if (!db->termlist_table.get_exact_entry(flint_docid_to_key(did), data))
44        throw Xapian::DocNotFoundError("No termlist for document " + om_tostring(did));
45
46    pos = data.data();
47    end = pos + data.size();
48
49    if (pos == end) {
50        doclen = 0;
51        termlist_size = 0;
52        return;
53    }
54
55    // Read doclen
56    if (!unpack_uint(&pos, end, &doclen)) {
57        const char *msg;
58        if (pos == 0) {
59            msg = "Too little data for doclen in termlist";
60        } else {
61            msg = "Overflowed value for doclen in termlist";
62        }
63        throw Xapian::DatabaseCorruptError(msg);
64    }
65
66    // Read termlist_size
67    if (!unpack_uint(&pos, end, &termlist_size)) {
68        const char *msg;
69        if (pos == 0) {
70            msg = "Too little data for list size in termlist";
71        } else {
72            msg = "Overflowed value for list size in termlist";
73        }
74        throw Xapian::DatabaseCorruptError(msg);
75    }
76
77    // See comment in FlintTermListTable::set_termlist() in
78    // flint_termlisttable.cc for an explanation of this!
79    if (pos != end && *pos == '0') ++pos;
80}
81
82flint_doclen_t
83FlintTermList::get_doclength() const
84{
85    DEBUGCALL(DB, flint_doclen_t, "FlintTermList::get_doclength", "");
86    RETURN(doclen);
87}
88
89Xapian::termcount
90FlintTermList::get_approx_size() const
91{
92    DEBUGCALL(DB, Xapian::termcount, "FlintTermList::get_approx_size", "");
93    RETURN(termlist_size);
94}
95
96void
97FlintTermList::accumulate_stats(Xapian::Internal::ExpandStats & stats) const
98{
99    DEBUGCALL(DB, void, "FlintTermList::accumulate_stats", "[stats&]");
100    Assert(!at_end());
101    stats.accumulate(current_wdf, doclen, get_termfreq(), db->get_doccount());
102}
103
104string
105FlintTermList::get_termname() const
106{
107    DEBUGCALL(DB, string, "FlintTermList::get_termname", "");
108    RETURN(current_term);
109}
110
111Xapian::termcount
112FlintTermList::get_wdf() const
113{
114    DEBUGCALL(DB, Xapian::termcount, "FlintTermList::get_wdf", "");
115    RETURN(current_wdf);
116}
117
118Xapian::doccount
119FlintTermList::get_termfreq() const
120{
121    DEBUGCALL(DB, Xapian::doccount, "FlintTermList::get_termfreq", "");
122    if (current_termfreq == 0)
123        current_termfreq = db->get_termfreq(current_term);
124    RETURN(current_termfreq);
125}
126
127TermList *
128FlintTermList::next()
129{
130    DEBUGCALL(DB, TermList *, "FlintTermList::next", "");
131    Assert(!at_end());
132    if (pos == end) {
133        pos = NULL;
134        RETURN(NULL);
135    }
136
137    // Reset to 0 to indicate that the termfreq needs to be read.
138    current_termfreq = 0;
139
140    bool wdf_in_reuse = false;
141    if (!current_term.empty()) {
142        // Find out how much of the previous term to reuse.
143        size_t len = static_cast<unsigned char>(*pos++);
144        if (len > current_term.size()) {
145            // The wdf is also stored in the "reuse" byte.
146            wdf_in_reuse = true;
147            size_t divisor = current_term.size() + 1;
148            current_wdf = len / divisor - 1;
149            len %= divisor;
150        }
151        current_term.resize(len);
152    }
153
154    // Append the new tail to form the next term.
155    size_t append_len = static_cast<unsigned char>(*pos++);
156    current_term.append(pos, append_len);
157    pos += append_len;
158
159    // Read the wdf if it wasn't packed into the reuse byte.
160    if (!wdf_in_reuse && !unpack_uint(&pos, end, &current_wdf)) {
161        const char *msg;
162        if (pos == 0) {
163            msg = "Too little data for wdf in termlist";
164        } else {
165            msg = "Overflowed value for wdf in termlist";
166        }
167        throw Xapian::DatabaseCorruptError(msg);
168    }
169
170    RETURN(NULL);
171}
172
173bool
174FlintTermList::at_end() const
175{
176    DEBUGCALL(DB, bool, "FlintTermList::at_end", "");
177    RETURN(pos == NULL);
178}
179
180Xapian::termcount
181FlintTermList::positionlist_count() const
182{
183    DEBUGCALL(DB, Xapian::termcount, "FlintTermList::positionlist_count", "");
184    RETURN(db->position_table.positionlist_count(did, current_term));
185}
186
187Xapian::PositionIterator
188FlintTermList::positionlist_begin() const
189{
190    DEBUGCALL(DB, Xapian::PositionIterator, "FlintTermList::positionlist_begin", "");
191    return Xapian::PositionIterator(
192            new FlintPositionList(&db->position_table, did, current_term));
193}
Note: See TracBrowser for help on using the browser.