root / tags / 1.0.8 / xapian-core / backends / flint / flint_postlist.h

Revision 9683, 8.0 kB (checked in by richard, 14 months ago)

backends/flint/flint_postlist.cc,backends/quartz/quartz_postlist.cc:
Add NORETURN macro to report_read_error(); fixes warnings from
GCC 4.3 about possibly uninitialised values. Reorder header
includes to follow proposed policy.
backends/flint/flint_postlist.h: Add include of omdebug.h which
previously needed to be done before including this. Tidy up
order of includes.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* flint_postlist.h: Postlists in flint databases
2 *
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2005,2007 Olly Betts
6 * Copyright 2007 Lemur Consulting Ltd
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
21 * USA
22 */
23
24#ifndef OM_HGUARD_FLINT_POSTLIST_H
25#define OM_HGUARD_FLINT_POSTLIST_H
26
27#include <xapian/database.h>
28
29#include "flint_types.h"
30#include "flint_positionlist.h"
31#include "leafpostlist.h"
32#include "omassert.h"
33#include "omdebug.h"
34
35#include "autoptr.h"
36#include <map>
37#include <string>
38
39using namespace std;
40
41class FlintCursor;
42class FlintDatabase;
43
44class PostlistChunkReader;
45class PostlistChunkWriter;
46
47class FlintPostListTable : public FlintTable {
48    public:
49        /** Create a new table object.
50         *
51         *  This does not create the table on disk - the create() method must
52         *  be called before the table is created on disk
53         *
54         *  This also does not open the table - the open() method must be
55         *  called before use is made of the table.
56         *
57         *  @param path_          - Path at which the table is stored.
58         *  @param readonly_      - whether to open the table for read only
59         *                          access.
60         */
61        FlintPostListTable(string path_, bool readonly_)
62            : FlintTable(path_ + "/postlist.", readonly_) { }
63
64        /// Merge added, removed, and changed entries.
65        void merge_changes(
66            const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
67            const map<Xapian::docid, Xapian::termcount> & doclens,
68            const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas);
69
70        Xapian::docid get_chunk(const string &tname,
71                Xapian::docid did, bool adding,
72                PostlistChunkReader ** from, PostlistChunkWriter **to);
73
74        /// Compose a key from a termname and docid.
75        static string make_key(const string & term, Xapian::docid did) {
76            string key = pack_string_preserving_sort(term);
77            key += pack_uint_preserving_sort(did);
78            return key;
79        }
80
81        /// Compose a key from a termname.
82        static string make_key(const string & term) {
83            return pack_string_preserving_sort(term);
84        }
85
86        bool term_exists(const string & term) const {
87            return key_exists(make_key(term));
88        }
89
90        /** Returns number of docs indexed by @a term.
91         *
92         *  This is the length of the postlist.
93         */
94        Xapian::doccount get_termfreq(const std::string & term) const;
95
96        /** Returns the number of occurrences of @a term in the database.
97         *
98         *  This is the sum of the wdfs in the postlist.
99         */
100        Xapian::termcount get_collection_freq(const std::string & term) const;
101};
102
103/** A postlist in a flint database.
104 */
105class FlintPostList : public LeafPostList {
106   protected: // FlintModifiedPostList needs to access these.
107        /** The database we are searching.  This pointer is held so that the
108         *  database doesn't get deleted before us, and also to give us access
109         *  to the position_table.
110         */
111        Xapian::Internal::RefCntPtr<const FlintDatabase> this_db;
112
113        /// The termname for this postlist.
114        string tname;
115
116        /// Whether we've started reading the list yet.
117        bool have_started;
118
119        /// The position list object for this posting list.
120        FlintPositionList positionlist;
121
122    private:
123        /// Cursor pointing to current chunk of postlist.
124        AutoPtr<FlintCursor> cursor;
125
126        /// True if this is the last chunk.
127        bool is_last_chunk;
128
129        /// The first document id in this chunk.
130        Xapian::docid first_did_in_chunk;
131
132        /// The last document id in this chunk.
133        Xapian::docid last_did_in_chunk;
134
135        /// Position of iteration through current chunk.
136        const char * pos;
137
138        /// Pointer to byte after end of current chunk.
139        const char * end;
140
141        /// Document id we're currently at.
142        Xapian::docid did;
143
144        /// The (absolute) length of the current document.
145        flint_doclen_t doclength;
146
147        /// The wdf of the current document.
148        Xapian::termcount wdf;
149
150        /// Whether we've run off the end of the list yet.
151        bool is_at_end;
152
153        /// The number of entries in the posting list.
154        Xapian::doccount number_of_entries;
155
156        /// Copying is not allowed.
157        FlintPostList(const FlintPostList &);
158
159        /// Assignment is not allowed.
160        void operator=(const FlintPostList &);
161
162        /** Move to the next item in the chunk, if possible.
163         *  If already at the end of the chunk, returns false.
164         */
165        bool next_in_chunk();
166
167        /** Move to the next chunk.
168         *
169         *  If there are no more chunks in this postlist, this will set
170         *  is_at_end to true.
171         */
172        void next_chunk();
173
174        /** Return true if the given document ID lies in the range covered
175         *  by the current chunk.  This does not say whether the document ID
176         *  is actually present.  It will return false if the document ID
177         *  is greater than the last document ID in the chunk, even if it is
178         *  less than the first document ID in the next chunk: it is possible
179         *  for no chunk to contain a particular document ID.
180         */
181        bool current_chunk_contains(Xapian::docid desired_did);
182
183        /** Move to chunk containing the specified document ID.
184         *
185         *  This moves to the chunk whose starting document ID is
186         *  <= desired_did, but such that the next chunk's starting
187         *  document ID is > desired_did.
188         *
189         *  It is thus possible that current_chunk_contains(desired_did)
190         *  will return false after this call, since the document ID
191         *  might lie after the end of this chunk, but before the start
192         *  of the next chunk.
193         */
194        void move_to_chunk_containing(Xapian::docid desired_did);
195
196        /** Scan forward in the current chunk for the specified document ID.
197         *
198         *  This is particularly efficient if the desired document ID is
199         *  greater than the last in the chunk - it then skips straight
200         *  to the end.
201         *
202         *  @return true if we moved to a valid document,
203         *          false if we reached the end of the chunk.
204         */
205        bool move_forward_in_chunk_to_at_least(Xapian::docid desired_did);
206
207    public:
208        /// Default constructor.
209        FlintPostList(Xapian::Internal::RefCntPtr<const FlintDatabase> this_db_,
210                      const string & tname);
211
212        /// Destructor.
213        ~FlintPostList();
214
215        /** Returns number of docs indexed by this term.
216         *
217         *  This is the length of the postlist.
218         */
219        Xapian::doccount get_termfreq() const { return number_of_entries; }
220
221        /// Returns the current docid.
222        Xapian::docid get_docid() const { Assert(have_started); return did; }
223
224        /// Returns the length of current document.
225        Xapian::doclength get_doclength() const {
226            DEBUGCALL(DB, Xapian::doclength, "FlintPostList::get_doclength", "");
227            Assert(have_started);
228            RETURN(static_cast<Xapian::doclength>(doclength));
229        }
230
231        /** Returns the Within Document Frequency of the term in the current
232         *  document.
233         */
234        Xapian::termcount get_wdf() const { Assert(have_started); return wdf; }
235
236        /** Get the list of positions of the term in the current document.
237         */
238        PositionList *read_position_list();
239
240        /** Get the list of positions of the term in the current document.
241         */
242        PositionList * open_position_list() const;
243
244        /// Move to the next document.
245        PostList * next(Xapian::weight w_min);
246
247        /// Skip to next document with docid >= docid.
248        PostList * skip_to(Xapian::docid desired_did, Xapian::weight w_min);
249
250        /// Return true if and only if we're off the end of the list.
251        bool at_end() const { return is_at_end; }
252
253        /// Get a description of the document.
254        std::string get_description() const;
255
256        /// Read the number of entries and the collection frequency.
257        static void read_number_of_entries(const char ** posptr,
258                                           const char * end,
259                                           Xapian::termcount * number_of_entries_ptr,
260                                           Xapian::termcount * collection_freq_ptr);
261};
262
263#endif /* OM_HGUARD_FLINT_POSTLIST_H */
Note: See TracBrowser for help on using the browser.