root / tags / 1.0.8 / xapian-core / backends / flint / flint_postlist.cc

Revision 9683, 33.8 kB (checked in by richard, 14 months ago)

backends/flint/flint_postlist.cc,backends/quartz/quartz_postlist.cc:
Add NORETURN macro to report_read_error(); fixes warnings from
GCC 4.3 about possibly uninitialised values. Reorder header
includes to follow proposed policy.
backends/flint/flint_postlist.h: Add include of omdebug.h which
previously needed to be done before including this. Tidy up
order of includes.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* flint_postlist.cc: Postlists in a flint database
2 *
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002,2003,2004,2005,2007 Olly Betts
5 * Copyright 2007 Lemur Consulting Ltd
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20 * USA
21 */
22
23#include <config.h>
24#include "flint_postlist.h"
25
26#include "flint_cursor.h"
27#include "flint_database.h"
28#include "flint_utils.h"
29#include "noreturn.h"
30#include "omdebug.h"
31#include "utils.h"
32
33Xapian::doccount
34FlintPostListTable::get_termfreq(const string & term) const
35{
36    string key = make_key(term);
37    string tag;
38    if (!get_exact_entry(key, tag)) return 0;
39
40    Xapian::doccount termfreq;
41    const char * p = tag.data();
42    FlintPostList::read_number_of_entries(&p, p + tag.size(), &termfreq, NULL);
43    return termfreq;
44}
45
46Xapian::termcount
47FlintPostListTable::get_collection_freq(const string & term) const
48{
49    string key = make_key(term);
50    string tag;
51    if (!get_exact_entry(key, tag)) return 0;
52
53    Xapian::termcount collfreq;
54    const char * p = tag.data();
55    FlintPostList::read_number_of_entries(&p, p + tag.size(), NULL, &collfreq);
56    return collfreq;
57}
58
59// How big should chunks in the posting list be?  (They
60// will grow slightly bigger than this, but not more than a
61// few bytes extra) - FIXME: tune this value to try to
62// maximise how well blocks are used.  Or performance.
63// Or indexing speed.  Or something...
64const unsigned int CHUNKSIZE = 2000;
65
66/** PostlistChunkWriter is a wrapper which acts roughly as an
67 *  output iterator on a postlist chunk, taking care of the
68 *  messy details.  It's intended to be used with deletion and
69 *  replacing of entries, not for adding to the end, when it's
70 *  not really needed.
71 */
72class PostlistChunkWriter {
73    public:
74        PostlistChunkWriter(const string &orig_key_,
75                            bool is_first_chunk_,
76                            const string &tname_,
77                            bool is_last_chunk_);
78
79        /// Append an entry to this chunk.
80        void append(FlintTable * table, Xapian::docid did,
81                    Xapian::termcount wdf, flint_doclen_t doclen);
82
83        /// Append a block of raw entries to this chunk.
84        void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
85                        const string & s) {
86            Assert(!started);
87            first_did = first_did_;
88            current_did = current_did_;
89            if (!s.empty()) {
90                chunk.append(s);
91                started = true;
92            }
93        }
94
95        /** Flush the chunk to the buffered table.  Note: this may write it
96         *  with a different key to the original one, if for example the first
97         *  entry has changed.
98         */
99        void flush(FlintTable *table);
100
101    private:
102        string orig_key;
103        string tname;
104        bool is_first_chunk;
105        bool is_last_chunk;
106        bool started;
107
108        Xapian::docid first_did;
109        Xapian::docid current_did;
110
111        string chunk;
112};
113
114// Static functions
115
116/// Report an error when reading the posting list.
117XAPIAN_NORETURN(static void report_read_error(const char * position));
118static void report_read_error(const char * position)
119{
120    if (position == 0) {
121        // data ran out
122        throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
123    }
124    // overflow
125    throw Xapian::RangeError("Value in posting list too large.");
126}
127
128static inline bool get_tname_from_key(const char **src, const char *end,
129                               string &tname)
130{
131    return unpack_string_preserving_sort(src, end, tname);
132}
133
134static inline bool
135check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
136{
137    string tname_in_key;
138
139    // Read the termname.
140    if (!get_tname_from_key(keypos, keyend, tname_in_key)) {
141        report_read_error(*keypos);
142    }
143
144    // This should only fail if the postlist doesn't exist at all.
145    return tname_in_key == tname;
146}
147
148static inline bool
149check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
150{
151    if (*keypos == keyend) return false;
152
153    return check_tname_in_key_lite(keypos, keyend, tname);
154}
155
156/// Read the start of the first chunk in the posting list.
157static Xapian::docid
158read_start_of_first_chunk(const char ** posptr,
159                          const char * end,
160                          Xapian::doccount * number_of_entries_ptr,
161                          Xapian::termcount * collection_freq_ptr)
162{
163    DEBUGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk",
164                     (const void *)posptr << ", " <<
165                     (const void *)end << ", " <<
166                     (void *)number_of_entries_ptr << ", " <<
167                     (void *)collection_freq_ptr);
168
169    FlintPostList::read_number_of_entries(posptr, end,
170                           number_of_entries_ptr, collection_freq_ptr);
171    if (number_of_entries_ptr)
172        DEBUGLINE(DB, "number_of_entries = " << *number_of_entries_ptr);
173    if (collection_freq_ptr)
174        DEBUGLINE(DB, "collection_freq = " << *collection_freq_ptr);
175
176    Xapian::docid did;
177    // Read the docid of the first entry in the posting list.
178    if (!unpack_uint(posptr, end, &did))
179        report_read_error(*posptr);
180    ++did;
181    DEBUGLINE(DB, "doc_id = " << did);
182    RETURN(did);
183}
184
185static inline void read_did_increase(const char ** posptr,
186                              const char * end,
187                              Xapian::docid * did_ptr)
188{
189    Xapian::docid did_increase;
190    if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
191    *did_ptr += did_increase + 1;
192}
193
194/// Read the wdf and the document length of an item.
195static inline void read_wdf_and_length(const char ** posptr,
196                                const char * end,
197                                Xapian::termcount * wdf_ptr,
198                                flint_doclen_t * doclength_ptr)
199{
200    if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
201    if (!unpack_uint(posptr, end, doclength_ptr)) report_read_error(*posptr);
202}
203
204/// Read the start of a chunk.
205static Xapian::docid
206read_start_of_chunk(const char ** posptr,
207                    const char * end,
208                    Xapian::docid first_did_in_chunk,
209                    bool * is_last_chunk_ptr)
210{
211    DEBUGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk",
212                     reinterpret_cast<const void*>(posptr) << ", " <<
213                     reinterpret_cast<const void*>(end) << ", " <<
214                     first_did_in_chunk << ", " <<
215                     reinterpret_cast<const void*>(is_last_chunk_ptr));
216
217    // Read whether this is the last chunk
218    if (!unpack_bool(posptr, end, is_last_chunk_ptr))
219        report_read_error(*posptr);
220    if (is_last_chunk_ptr)
221        DEBUGLINE(DB, "is_last_chunk = " << *is_last_chunk_ptr);
222
223    // Read what the final document ID in this chunk is.
224    Xapian::docid increase_to_last;
225    if (!unpack_uint(posptr, end, &increase_to_last))
226        report_read_error(*posptr);
227    ++increase_to_last;
228    Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
229    DEBUGLINE(DB, "last_did_in_chunk = " << last_did_in_chunk);
230    RETURN(last_did_in_chunk);
231}
232
233static string make_wdf_and_length(Xapian::termcount wdf, flint_doclen_t doclength)
234{
235    return pack_uint(wdf) + pack_uint(doclength);
236}
237
238static void write_start_of_chunk(string & chunk,
239                                 unsigned int start_of_chunk_header,
240                                 unsigned int end_of_chunk_header,
241                                 bool is_last_chunk,
242                                 Xapian::docid first_did_in_chunk,
243                                 Xapian::docid last_did_in_chunk)
244{
245    Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
246    Assert(last_did_in_chunk >= first_did_in_chunk);
247    Xapian::docid increase_to_last = last_did_in_chunk - first_did_in_chunk;
248
249    chunk.replace(start_of_chunk_header,
250                  end_of_chunk_header - start_of_chunk_header,
251                  pack_bool(is_last_chunk) + pack_uint(increase_to_last - 1));
252    // FIXME - storing increase_to_last - 1 is bogus as this value is
253    // -1 when a postlist chunk has a single entry!  Luckily the code
254    // works despite this, but it's ugly.
255}
256
257/** PostlistChunkReader is essentially an iterator wrapper
258 *  around a postlist chunk.  It simply iterates through the
259 *  entries in a postlist.
260 */
261class PostlistChunkReader {
262    public:
263        /** Initialise the postlist chunk reader.
264         *
265         *  @param first_did  First document id in this chunk.
266         *  @param data       The tag string with the header removed.
267         */
268        PostlistChunkReader(Xapian::docid first_did, const string & data_)
269            : data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
270        {
271            if (!at_end) read_wdf_and_length(&pos, end, &wdf, &doclength);
272        }
273
274        Xapian::docid get_docid() const {
275            return did;
276        }
277        Xapian::termcount get_wdf() const {
278            return wdf;
279        }
280        flint_doclen_t get_doclength() const {
281            DEBUGCALL(DB, flint_doclen_t, "PostlistChunkReader::get_doclength", "");
282            RETURN(doclength);
283        }
284
285        bool is_at_end() const {
286            return at_end;
287        }
288
289        /** Advance to the next entry.  Set at_end if we run off the end.
290         */
291        void next();
292
293    private:
294        string data;
295
296        const char *pos;
297        const char *end;
298
299        bool at_end;
300
301        Xapian::docid did;
302        Xapian::termcount wdf;
303        flint_doclen_t doclength;
304};
305
306void
307PostlistChunkReader::next()
308{
309    if (pos == end) {
310        at_end = true;
311    } else {
312        read_did_increase(&pos, end, &did);
313        read_wdf_and_length(&pos, end, &wdf, &doclength);
314    }
315}
316
317PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_,
318                                         bool is_first_chunk_,
319                                         const string &tname_,
320                                         bool is_last_chunk_)
321        : orig_key(orig_key_),
322          tname(tname_), is_first_chunk(is_first_chunk_),
323          is_last_chunk(is_last_chunk_),
324          started(false)
325{
326    DEBUGCALL(DB, void, "PostlistChunkWriter::PostlistChunkWriter",
327              orig_key_ << ", " << is_first_chunk_ << ", " << tname_ << ", " <<
328              is_last_chunk_);
329}
330
331void
332PostlistChunkWriter::append(FlintTable * table, Xapian::docid did,
333                            Xapian::termcount wdf, flint_doclen_t doclen)
334{
335    if (!started) {
336        started = true;
337        first_did = did;
338    } else {
339        Assert(did > current_did);
340        // Start a new chunk if this one has grown to the threshold.
341        if (chunk.size() >= CHUNKSIZE) {
342            bool save_is_last_chunk = is_last_chunk;
343            is_last_chunk = false;
344            flush(table);
345            is_last_chunk = save_is_last_chunk;
346            is_first_chunk = false;
347            first_did = did;
348            chunk = "";
349            orig_key = FlintPostListTable::make_key(tname, first_did);
350        } else {
351            chunk.append(pack_uint(did - current_did - 1));
352        }
353    }
354    current_did = did;
355    chunk.append(make_wdf_and_length(wdf, doclen));
356}
357
358/** Make the data to go at the start of the very first chunk.
359 */
360static inline string
361make_start_of_first_chunk(Xapian::termcount entries,
362                          Xapian::termcount collectionfreq,
363                          Xapian::docid new_did)
364{
365    return pack_uint(entries) + pack_uint(collectionfreq) + pack_uint(new_did - 1);
366}
367
368/** Make the data to go at the start of a standard chunk.
369 */
370static inline string
371make_start_of_chunk(bool new_is_last_chunk,
372                    Xapian::docid new_first_did,
373                    Xapian::docid new_final_did)
374{
375    Assert(new_final_did >= new_first_did);
376    return pack_bool(new_is_last_chunk) +
377            pack_uint(new_final_did - new_first_did - 1);
378}
379
380void
381PostlistChunkWriter::flush(FlintTable *table)
382{
383    DEBUGCALL(DB, void, "PostlistChunkWriter::flush", table);
384
385    /* This is one of the more messy parts involved with updating posting
386     * list chunks.
387     *
388     * Depending on circumstances, we may have to delete an entire chunk
389     * or file it under a different key, as well as possibly modifying both
390     * the previous and next chunk of the postlist.
391     */
392
393    if (!started) {
394        /* This chunk is now empty so disappears entirely.
395         *
396         * If this was the last chunk, then the previous chunk
397         * must have its "is_last_chunk" flag updated.
398         *
399         * If this was the first chunk, then the next chunk must
400         * be transformed into the first chunk.  Messy!
401         */
402        DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
403        Assert(!orig_key.empty());
404        if (is_first_chunk) {
405            DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
406            if (is_last_chunk) {
407                /* This is the first and the last chunk, ie the only
408                 * chunk, so just delete the tag.
409                 */
410                table->del(orig_key);
411                return;
412            }
413
414            /* This is the messiest case.  The first chunk is to
415             * be removed, and there is at least one chunk after
416             * it.  Need to rewrite the next chunk as the first
417             * chunk.
418             */
419            AutoPtr<FlintCursor> cursor(table->cursor_get());
420
421            if (!cursor->find_entry(orig_key)) {
422                throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
423            }
424
425            // Extract existing counts from the first chunk so we can reinsert
426            // them into the block we're renaming.
427            Xapian::termcount num_ent, coll_freq;
428            {
429                cursor->read_tag();
430                const char *tagpos = cursor->current_tag.data();
431                const char *tagend = tagpos + cursor->current_tag.size();
432
433                (void)read_start_of_first_chunk(&tagpos, tagend,
434                                                &num_ent, &coll_freq);
435            }
436
437            // Seek to the next chunk.
438            cursor->next();
439            if (cursor->after_end()) {
440                throw Xapian::DatabaseCorruptError("Expected another key but found none");
441            }
442            const char *kpos = cursor->current_key.data();
443            const char *kend = kpos + cursor->current_key.size();
444            if (!check_tname_in_key(&kpos, kend, tname)) {
445                throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
446            }
447
448            // Read the new first docid
449            Xapian::docid new_first_did;
450            if (!unpack_uint_preserving_sort(&kpos, kend,
451                                             &new_first_did)) {
452                report_read_error(kpos);
453            }
454
455            cursor->read_tag();
456            const char *tagpos = cursor->current_tag.data();
457            const char *tagend = tagpos + cursor->current_tag.size();
458
459            // Read the chunk header
460            bool new_is_last_chunk;
461            Xapian::docid new_last_did_in_chunk =
462                read_start_of_chunk(&tagpos, tagend, new_first_did,
463                                    &new_is_last_chunk);
464
465            string chunk_data(tagpos, tagend);
466
467            // First remove the renamed tag
468            table->del(cursor->current_key);
469
470            // And now write it as the first chunk
471            string tag;
472            tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
473            tag += make_start_of_chunk(new_is_last_chunk,
474                                              new_first_did,
475                                              new_last_did_in_chunk);
476            tag += chunk_data;
477            table->add(orig_key, tag);
478            return;
479        }
480
481        DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
482        /* This isn't the first chunk.  Check whether we're the last
483         * chunk.
484         */
485
486        // Delete this chunk
487        table->del(orig_key);
488
489        if (is_last_chunk) {
490            DEBUGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
491            // Update the previous chunk's is_last_chunk flag.
492            AutoPtr<FlintCursor> cursor(table->cursor_get());
493
494            /* Should not find the key we just deleted, but should
495             * find the previous chunk. */
496            if (cursor->find_entry(orig_key)) {
497                throw Xapian::DatabaseCorruptError("Flint key not deleted as we expected");
498            }
499            // Make sure this is a chunk with the right term attached.
500            const char * keypos = cursor->current_key.data();
501            const char * keyend = keypos + cursor->current_key.size();
502            if (!check_tname_in_key(&keypos, keyend, tname)) {
503                throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
504            }
505
506            bool is_prev_first_chunk = (keypos == keyend);
507
508            // Now update the last_chunk
509            cursor->read_tag();
510            string tag = cursor->current_tag;
511
512            const char *tagpos = tag.data();
513            const char *tagend = tagpos + tag.size();
514
515            // Skip first chunk header
516            Xapian::docid first_did_in_chunk;
517            if (is_prev_first_chunk) {
518                first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
519                                          0, 0);
520            } else {
521                if (!unpack_uint_preserving_sort(&keypos, keyend,
522                                                 &first_did_in_chunk))
523                    report_read_error(keypos);
524            }
525            bool wrong_is_last_chunk;
526            string::size_type start_of_chunk_header = tagpos - tag.data();
527            Xapian::docid last_did_in_chunk =
528                read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
529                                    &wrong_is_last_chunk);
530            string::size_type end_of_chunk_header = tagpos - tag.data();
531
532            // write new is_last flag
533            write_start_of_chunk(tag,
534                                 start_of_chunk_header,
535                                 end_of_chunk_header,
536                                 true, // is_last_chunk
537                                 first_did_in_chunk,
538                                 last_did_in_chunk);
539            table->add(cursor->current_key, tag);
540        }
541    } else {
542        DEBUGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
543        /* The chunk still has some items in it.  Two major subcases:
544         * a) This is the first chunk.
545         * b) This isn't the first chunk.
546         *
547         * The subcases just affect the chunk header.
548         */
549        string tag;
550
551        /* First write the header, which depends on whether this is the
552         * first chunk.
553         */
554        if (is_first_chunk) {
555            /* The first chunk.  This is the relatively easy case,
556             * and we just have to write this one back to disk.
557             */
558            DEBUGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
559            string key = FlintPostListTable::make_key(tname);
560            bool ok = table->get_exact_entry(key, tag);
561            (void)ok;
562            Assert(ok);
563            Assert(!tag.empty());
564
565            Xapian::termcount num_ent, coll_freq;
566            {
567                const char * tagpos = tag.data();
568                const char * tagend = tagpos + tag.size();
569                (void)read_start_of_first_chunk(&tagpos, tagend,
570                                                &num_ent, &coll_freq);
571            }
572
573            tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
574
575            tag += make_start_of_chunk(is_last_chunk, first_did, current_did);
576            tag += chunk;
577            table->add(key, tag);
578            return;
579        }
580
581        DEBUGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
582        /* Not the first chunk.
583         *
584         * This has the easy sub-sub-case:
585         *   The first entry in the chunk hasn't changed
586         * ...and the hard sub-sub-case:
587         *   The first entry in the chunk has changed.  This is
588         *   harder because the key for the chunk changes, so
589         *   we've got to do a switch.
590         */
591
592        // First find out the initial docid
593        const char *keypos = orig_key.data();
594        const char *keyend = keypos + orig_key.size();
595        if (!check_tname_in_key(&keypos, keyend, tname)) {
596            throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
597        }
598        Xapian::docid initial_did;
599        if (!unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
600            report_read_error(keypos);
601        }
602        string new_key;
603        if (initial_did != first_did) {
604            /* The fiddlier case:
605             * Create a new tag with the correct key, and replace
606             * the old one.
607             */
608            new_key = FlintPostListTable::make_key(tname, first_did);
609            table->del(orig_key);
610        } else {
611            new_key = orig_key;
612        }
613
614        // ...and write the start of this chunk.
615        tag = make_start_of_chunk(is_last_chunk, first_did, current_did);
616
617        tag += chunk;
618        table->add(new_key, tag);
619    }
620}
621
622/** Read the number of entries in the posting list.
623 *  This must only be called when *posptr is pointing to the start of
624 *  the first chunk of the posting list.
625 */
626void FlintPostList::read_number_of_entries(const char ** posptr,
627                                   const char * end,
628                                   Xapian::doccount * number_of_entries_ptr,
629                                   Xapian::termcount * collection_freq_ptr)
630{
631    if (!unpack_uint(posptr, end, number_of_entries_ptr))
632        report_read_error(*posptr);
633    if (!unpack_uint(posptr, end, collection_freq_ptr))
634        report_read_error(*posptr);
635}
636
637/** The format of a postlist is:
638 *
639 *  Split into chunks.  Key for first chunk is the termname (encoded as
640 *  length - name).  Key for subsequent chunks is the same, followed by the
641 *  document ID of the first document in the chunk (encoded as length of
642 *  representation in first byte, and then docid).
643 *
644 *  A chunk (except for the first chunk) contains:
645 *
646 *  1)  bool - true if this is the last chunk.
647 *  2)  difference between final docid in chunk and first docid.
648 *  3)  wdf, then doclength of first item.
649 *  4)  increment in docid to next item, followed by wdf and doclength of item
650 *  5)  (4) repeatedly.
651 *
652 *  The first chunk begins with the number of entries, then the docid of the
653 *  first document, then has the header of a standard chunk.
654 */
655FlintPostList::FlintPostList(Xapian::Internal::RefCntPtr<const FlintDatabase> this_db_,
656                             const string & tname_)
657        : this_db(this_db_),
658          tname(tname_),
659          have_started(false),
660          cursor(this_db->postlist_table.cursor_get()),
661          is_at_end(false)
662{
663    DEBUGCALL(DB, void, "FlintPostList::ostList",
664              this_db_.get() << ", " << tname_);
665    string key = FlintPostListTable::make_key(tname);
666    int found = cursor->find_entry(key);
667    if (!found) {
668        number_of_entries = 0;
669        is_at_end = true;
670        pos = 0;
671        end = 0;
672        first_did_in_chunk = 0;
673        last_did_in_chunk = 0;
674        return;
675    }
676    cursor->read_tag();
677    pos = cursor->current_tag.data();
678    end = pos + cursor->current_tag.size();
679
680    did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
681    first_did_in_chunk = did;
682    last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
683                                            &is_last_chunk);
684    read_wdf_and_length(&pos, end, &wdf, &doclength);
685}
686
687FlintPostList::~FlintPostList()
688{
689    DEBUGCALL(DB, void, "FlintPostList::~FlintPostList", "");
690}
691
692bool
693FlintPostList::next_in_chunk()
694{
695    DEBUGCALL(DB, bool, "FlintPostList::next_in_chunk", "");
696    if (pos == end) RETURN(false);
697
698    read_did_increase(&pos, end, &did);
699    read_wdf_and_length(&pos, end, &wdf, &doclength);
700
701    // Either not at last doc in chunk, or pos == end, but not both.
702    Assert(did <= last_did_in_chunk);
703    Assert(did < last_did_in_chunk || pos == end);
704    Assert(pos != end || did == last_did_in_chunk);
705
706    RETURN(true);
707}
708
709void
710FlintPostList::next_chunk()
711{
712    DEBUGCALL(DB, void, "FlintPostList::next_chunk", "");
713    if (is_last_chunk) {
714        is_at_end = true;
715        return;
716    }
717
718    cursor->next();
719    if (cursor->after_end()) {
720        is_at_end = true;
721        throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
722                                     tname + "'");
723    }
724    const char * keypos = cursor->current_key.data();
725    const char * keyend = keypos + cursor->current_key.size();
726    // Check we're still in same postlist
727    if (!check_tname_in_key_lite(&keypos, keyend, tname)) {
728        is_at_end = true;
729        throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
730                                     tname + "'");
731    }
732
733    Xapian::docid newdid;
734    if (!unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
735        report_read_error(keypos);
736    }
737    if (newdid <= did) {
738        throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
739                om_tostring(newdid) +
740                ") is not greater than final document ID in previous chunk (" +
741                om_tostring(did) + ")");
742    }
743    did = newdid;
744
745    cursor->read_tag();
746    pos = cursor->current_tag.data();
747    end = pos + cursor->current_tag.size();
748
749    first_did_in_chunk = did;
750    last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
751                                            &is_last_chunk);
752    read_wdf_and_length(&pos, end, &wdf, &doclength);
753}
754
755PositionList *
756FlintPostList::read_position_list()
757{
758    DEBUGCALL(DB, PositionList *, "FlintPostList::read_position_list", "");
759    positionlist.read_data(&this_db->position_table, did, tname);
760    RETURN(&positionlist);
761}
762
763PositionList *
764FlintPostList::open_position_list() const
765{
766    DEBUGCALL(DB, PositionList *, "FlintPostList::open_position_list", "");
767    RETURN(new FlintPositionList(&this_db->position_table, did, tname));
768}
769
770PostList *
771FlintPostList::next(Xapian::weight w_min)
772{
773    DEBUGCALL(DB, PostList *, "FlintPostList::next", w_min);
774    (void)w_min; // no warning
775
776    if (!have_started) {
777        have_started = true;
778    } else {
779        if (!next_in_chunk()) next_chunk();
780    }
781
782    DEBUGLINE(DB, string("Moved to ") <<
783              (is_at_end ? string("end.") : string("docid, wdf, doclength = ") +
784               om_tostring(did) + ", " + om_tostring(wdf) + ", " +
785               om_tostring(doclength)));
786
787    RETURN(NULL);
788}
789
790bool
791FlintPostList::current_chunk_contains(Xapian::docid desired_did)
792{
793    DEBUGCALL(DB, bool, "FlintPostList::current_chunk_contains", desired_did);
794    if (desired_did >= first_did_in_chunk &&
795        desired_did <= last_did_in_chunk) {
796        RETURN(true);
797    }
798    RETURN(false);
799}
800
801void
802FlintPostList::move_to_chunk_containing(Xapian::docid desired_did)
803{
804    DEBUGCALL(DB, void,
805              "FlintPostList::move_to_chunk_containing", desired_did);
806    (void)cursor->find_entry(FlintPostListTable::make_key(tname, desired_did));
807    Assert(!cursor->after_end());
808
809    const char * keypos = cursor->current_key.data();
810    const char * keyend = keypos + cursor->current_key.size();
811    // Check we're still in same postlist
812    if (!check_tname_in_key_lite(&keypos, keyend, tname)) {
813        // This should only happen if the postlist doesn't exist at all.
814        is_at_end = true;
815        is_last_chunk = true;
816        return;
817    }
818    is_at_end = false;
819
820    cursor->read_tag();
821    pos = cursor->current_tag.data();
822    end = pos + cursor->current_tag.size();
823
824    if (keypos == keyend) {
825        // In first chunk
826#ifdef XAPIAN_DEBUG
827        Xapian::doccount old_number_of_entries = number_of_entries;
828        did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
829        Assert(old_number_of_entries == number_of_entries);
830#else
831        did = read_start_of_first_chunk(&pos, end, NULL, NULL);
832#endif
833    } else {
834        // In normal chunk
835        if (!unpack_uint_preserving_sort(&keypos, keyend, &did)) {
836            report_read_error(keypos);
837        }
838    }
839
840    first_did_in_chunk = did;
841    last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
842                                            &is_last_chunk);
843    read_wdf_and_length(&pos, end, &wdf, &doclength);
844
845    // Possible, since desired_did might be after end of this chunk and before
846    // the next.
847    if (desired_did > last_did_in_chunk) next_chunk();
848}
849
850bool
851FlintPostList::move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
852{
853    DEBUGCALL(DB, bool,
854              "FlintPostList::move_forward_in_chunk_to_at_least", desired_did);
855    if (desired_did > last_did_in_chunk) {
856        pos = end;
857        RETURN(false);
858    }
859    while (did < desired_did) {
860        // FIXME: perhaps we don't need to decode the wdf and document length
861        // for documents we're skipping past.
862        bool at_end_of_chunk = !next_in_chunk();
863        if (at_end_of_chunk) RETURN(false);
864    }
865    RETURN(true);
866}
867
868PostList *
869FlintPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
870{
871    DEBUGCALL(DB, PostList *,
872              "FlintPostList::skip_to", desired_did << ", " << w_min);
873    (void)w_min; // no warning
874    // We've started now - if we hadn't already, we're already positioned
875    // at start so there's no need to actually do anything.
876    have_started = true;
877
878    // Don't skip back, and don't need to do anything if already there.
879    if (is_at_end || desired_did <= did) RETURN(NULL);
880
881    // Move to correct chunk
882    if (!current_chunk_contains(desired_did)) {
883        move_to_chunk_containing(desired_did);
884        // Might be at_end now, so we need to check before trying to move
885        // forward in chunk.
886        if (is_at_end) RETURN(NULL);
887    }
888