root / tags / 1.0.8 / xapian-core / backends / flint / flint_table.h

Revision 11154, 21.9 kB (checked in by olly, 4 months ago)

Backport change from trunk:
backends/flint/flint_table.cc,backends/flint/flint_table.h:
Eliminate other_base_letter member of FlintTable - its value can
always be easily determined from base_letter.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* flint_table.h: Btree implementation
2 *
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002,2003,2004,2005,2006,2007,2008 Olly Betts
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
19 * USA
20 */
21
22#ifndef OM_HGUARD_FLINT_TABLE_H
23#define OM_HGUARD_FLINT_TABLE_H
24
25#include <xapian/error.h>
26#include <xapian/visibility.h>
27
28#include <algorithm>
29#include <string>
30using std::string;
31
32#include "flint_types.h"
33#include "flint_btreebase.h"
34#include "flint_btreeutil.h"
35#include "flint_cursor.h"
36#include "noreturn.h"
37
38#include "stringutils.h"
39#include "utils.h"
40
41#include <zlib.h>
42
43#define DONT_COMPRESS -1
44
45/** The largest possible value of a key_len.
46 *
47 *  This gives the upper limit of the size of a key that may be stored in the
48 *  B-tree (252 bytes with the present implementation).
49 */
50#define FLINT_BTREE_MAX_KEY_LEN 252
51
52// FIXME: This named constant probably isn't used everywhere it should be...
53#define BYTES_PER_BLOCK_NUMBER 4
54
55/*  The B-tree blocks have a number of internal lengths and offsets held in 1, 2
56    or 4 bytes. To make the coding a little clearer,
57       we use  for
58       ------  ---
59       K1      the 1 byte length of key
60       I2      the 2 byte length of an item (key-tag pair)
61       D2      the 2 byte offset to the item from the directory
62       C2      the 2 byte counter that ends each key and begins each tag
63*/
64
65#define K1 1
66#define I2 2
67#define D2 2
68#define C2 2
69
70/*  and when getting K1 or setting D2, we use getK, setD defined as: */
71
72#define getK(p, c)    getint1(p, c)
73#define setD(p, c, x) setint2(p, c, x)
74
75/* if you've been reading the comments from the top, the next four procedures
76   will not cause any headaches.
77
78   Recall that item has this form:
79
80           i k
81           | |
82           I K key x C tag
83             <--K-->
84           <------I------>
85
86
87   item_of(p, c) returns i, the address of the item at block address p,
88   directory offset c,
89
90   component_of(p, c) returns the number marked 'x' above,
91
92   components_of(p, c) returns the number marked 'C' above,
93*/
94
95class XAPIAN_VISIBILITY_DEFAULT Key_ {
96    const byte *p;
97public:
98    explicit Key_(const byte * p_) : p(p_) { }
99    const byte * get_address() const { return p; }
100    void read(string * key) const {
101        key->assign(reinterpret_cast<const char *>(p + K1), length());
102    }
103    bool operator==(Key_ key2) const;
104    bool operator!=(Key_ key2) const { return !(*this == key2); }
105    bool operator<(Key_ key2) const;
106    bool operator>=(Key_ key2) const { return !(*this < key2); }
107    bool operator>(Key_ key2) const { return key2 < *this; }
108    bool operator<=(Key_ key2) const { return !(key2 < *this); }
109    int length() const {
110        return getK(p, 0) - C2 - K1;
111    }
112    char operator[](size_t i) const {
113        return p[i + K1];
114    }
115};
116
117// Item_wr_ wants to be "Item_ with non-const p and more methods" - we can't
118// achieve that nicely with inheritance, so we use a template base class.
119template <class T> class Item_base_ {
120protected:
121    T p;
122public:
123    /* Item_ from block address and offset to item pointer */
124    Item_base_(T p_, int c) : p(p_ + getint2(p_, c)) { }
125    Item_base_(T p_) : p(p_) { }
126    T get_address() const { return p; }
127    int size() const { return getint2(p, 0) & 0x7fff; } /* I in diagram above */
128    bool get_compressed() const { return *p & 0x80; }
129    int component_of() const {
130        return getint2(p, getK(p, I2) + I2 - C2);
131    }
132    int components_of() const {
133        return getint2(p, getK(p, I2) + I2);
134    }
135    Key_ key() const { return Key_(p + I2); }
136    void append_chunk(string * tag) const {
137        /* number of bytes to extract from current component */
138        int cd = getK(p, I2) + I2 + C2;
139        int l = size() - cd;
140        tag->append(reinterpret_cast<const char *>(p + cd), l);
141    }
142    /** Get this item's tag as a block number (this block should not be at
143     *  level 0).
144     */
145    uint4 block_given_by() const {
146        return getint4(p, size() - BYTES_PER_BLOCK_NUMBER);
147    }
148};
149
150class Item_ : public Item_base_<const byte *> {
151public:
152    /* Item_ from block address and offset to item pointer */
153    Item_(const byte * p_, int c) : Item_base_<const byte *>(p_, c) { }
154    Item_(const byte * p_) : Item_base_<const byte *>(p_) { }
155};
156
157class Item_wr_ : public Item_base_<byte *> {
158    void set_key_len(int x) { setint1(p, I2, x); }
159public:
160    /* Item_wr_ from block address and offset to item pointer */
161    Item_wr_(byte * p_, int c) : Item_base_<byte *>(p_, c) { }
162    Item_wr_(byte * p_) : Item_base_<byte *>(p_) { }
163    void set_component_of(int i) {
164        setint2(p, getK(p, I2) + I2 - C2, i);
165    }
166    void set_components_of(int m) {
167        setint2(p, getK(p, I2) + I2, m);
168    }
169    // Takes size as we may be truncating newkey.
170    void set_key_and_block(Key_ newkey, int truncate_size, uint4 n) {
171        int i = truncate_size;
172        // Read the length now because we may be copying the key over itself.
173        // FIXME that's stupid!  sort this out
174        int newkey_len = newkey.length();
175        int newsize = I2 + K1 + i + C2;
176        // Item size (4 since tag contains block number)
177        setint2(p, 0, newsize + 4);
178        // Key size
179        setint1(p, I2, newsize - I2);
180        // Copy the main part of the key, possibly truncating.
181        memmove(p + I2 + K1, newkey.get_address() + K1, i);
182        // Copy the count part.
183        memmove(p + I2 + K1 + i, newkey.get_address() + K1 + newkey_len, C2);
184        // Set tag contents to block number
185//      set_block_given_by(n);
186        setint4(p, newsize, n);
187    }
188
189    /** Set this item's tag to point to block n (this block should not be at
190     *  level 0).
191     */
192    void set_block_given_by(uint4 n) {
193        setint4(p, size() - BYTES_PER_BLOCK_NUMBER, n);
194    }
195    void set_size(int l) { setint2(p, 0, l); }
196    /** Form an item with a null key and with block number n in the tag.
197     */
198    void form_null_key(uint4 n) {
199        setint4(p, I2 + K1, n);
200        set_key_len(K1);        /* null key */
201        set_size(I2 + K1 + 4);  /* total length */
202    }
203    void form_key(const string & key_) {
204        string::size_type key_len = key_.length();
205        if (key_len > FLINT_BTREE_MAX_KEY_LEN) {
206            // We check term length when a term is added to a document but
207            // flint doubles zero bytes, so this can still happen for terms
208            // which contain one or more zero bytes.
209            string msg("Key too long: length was ");
210            msg += om_tostring(key_len);
211            msg += " bytes, maximum length of a key is "
212                   STRINGIZE(FLINT_BTREE_MAX_KEY_LEN) " bytes";
213            throw Xapian::InvalidArgumentError(msg);
214        }
215
216        set_key_len(key_len + K1 + C2);
217        memmove(p + I2 + K1, key_.data(), key_len);
218        set_component_of(1);
219    }
220    // FIXME passing cd here is icky
221    void set_tag(int cd, const char *start, int len, bool compressed) {
222        memmove(p + cd, start, len);
223        set_size(cd + len);
224        if (compressed) *p |= 0x80;
225    }
226    void fake_root_item() {
227        set_key_len(K1 + C2);   // null key length
228        set_size(I2 + K1 + 2 * C2);   // length of the item
229        set_component_of(1);
230        set_components_of(1);
231    }
232};
233
234// Allow for BTREE_CURSOR_LEVELS levels in the B-tree.
235// With 10, overflow is practically impossible
236// FIXME: but we want it to be completely impossible...
237#define BTREE_CURSOR_LEVELS 10
238
239/** Class managing a Btree table in a Flint database.
240 *
241 *  A table is a store holding a set of key/tag pairs.
242 *
243 *  A key is used to access a block of data in a flint table.
244 *
245 *  Keys are of limited length.
246 *
247 *  Keys may not be empty (each Btree has a special empty key for internal use).
248 *
249 *  A tag is a piece of data associated with a given key.  The contents
250 *  of the tag are opaque to the Btree.
251 *
252 *  Tags may be of arbitrary length (the Btree imposes a very large limit).
253 *  Note though that they will be loaded into memory in their entirety, so
254 *  should not be permitted to grow without bound in normal usage.
255 *
256 *  Tags which are null strings _are_ valid, and are different from a
257 *  tag simply not being in the table.
258 */
259class XAPIAN_VISIBILITY_DEFAULT FlintTable {
260    friend class FlintCursor; /* Should probably fix this. */
261    private:
262        /// Copying not allowed
263        FlintTable(const FlintTable &);
264
265        /// Assignment not allowed
266        FlintTable & operator=(const FlintTable &);
267
268    public:
269        /** Create a new Btree object.
270         *
271         *  This does not create the table on disk - the create_and_open()
272         *  method must be called to create the table on disk.
273         *
274         *  This also does not open the table - either the create_and_open()
275         *  or open() methods must be called before use is made of the table.
276         *
277         *  @param path_        Path at which the table is stored.
278         *  @param readonly_    whether to open the table for read only access.
279         *  @param compress_strategy_   DONT_COMPRESS, Z_DEFAULT_STRATEGY,
280         *                              Z_FILTERED, Z_HUFFMAN_ONLY, or Z_RLE.
281         *  @param lazy         If true, don't create the table until it's
282         *                      needed.
283         */
284        FlintTable(string path_, bool readonly_,
285                   int compress_strategy_ = DONT_COMPRESS, bool lazy = false);
286
287        /** Close the Btree.
288         *
289         *  Any outstanding changes (ie, changes made without commit() having
290         *  subsequently been called) will be lost.
291         */
292        ~FlintTable();
293
294        /** Close the Btree.  This closes and frees any of the btree
295         *  structures which have been created and opened.
296         */
297        void close();
298
299        /** Determine whether the btree exists on disk.
300         */
301        bool exists() const;
302
303        /** Open the btree at the latest revision.
304         *
305         *  @exception Xapian::DatabaseCorruptError will be thrown if the table
306         *      is in a corrupt state.
307         *  @exception Xapian::DatabaseOpeningError will be thrown if the table
308         *      cannot be opened (but is not corrupt - eg, permission problems,
309         *      not present, etc).
310         */
311        void open();
312
313        /** Open the btree at a given revision.
314         *
315         *  Like Btree::open, but try to open at the given revision number
316         *  and fail if that isn't possible.
317         *
318         *  @param revision_      - revision number to open.
319         *
320         *  @return true if table is successfully opened at desired revision;
321         *          false if table cannot be opened at desired revision (but
322         *          table is otherwise consistent).
323         *
324         *  @exception Xapian::DatabaseCorruptError will be thrown if the table
325         *      is in a corrupt state.
326         *  @exception Xapian::DatabaseOpeningError will be thrown if the table
327         *      cannot be opened (but is not corrupt - eg, permission problems,
328         *      not present, etc).
329         */
330        bool open(flint_revision_number_t revision_);
331
332        /** Commit any outstanding changes to the table.
333         *
334         *  Commit changes made by calling add() and del() to the Btree.
335         *
336         *  If an error occurs during the operation, this will be signalled
337         *  by an exception.  In case of error, changes made will not be
338         *  committed to the Btree - they will be discarded.
339         *
340         *  @param new_revision  The new revision number to store.  This must
341         *          be greater than the latest revision number (see
342         *          get_latest_revision_number()), or an exception will be
343         *          thrown.
344         */
345        void commit(flint_revision_number_t revision);
346
347        /** Cancel any outstanding changes.
348         *
349         *  This will discard any modifications which haven't been committed
350         *  by calling commit().
351         */
352        void cancel();
353
354        /** Read an entry from the table, if and only if it is exactly that
355         *  being asked for.
356         *
357         *  If the key is found in the table, the tag will be filled with
358         *  the data associated with the key.  If the key is not found,
359         *  the tag will be unmodified.
360         *
361         *  @param key  The key to look for in the table.
362         *  @param tag  A tag object to fill with the value if found.
363         *
364         *  @return true if key is found in table,
365         *          false if key is not found in table.
366         */
367        bool get_exact_entry(const string & key, string & tag) const;
368
369        /** Check if a key exists in the Btree.
370         *
371         *  This is just like get_exact_entry() except it doesn't read the tag
372         *  value so is more efficient if you only want to check that the key
373         *  exists.
374         *
375         *  @param key  The key to look for in the table.
376         *
377         *  @return true if key is found in table,
378         *          false if key is not found in table.
379         */
380        bool key_exists(const string &key) const;
381
382        /** Find a key in the Btree and read its tag.
383         *
384         *  If the key is found the tag is copied to tag.  If the key is not
385         *  found tag is left unchanged.
386         *
387         *  The result is true iff the specified key is found in the Btree.
388         *
389         *  e.g.
390         *
391         *    string t;
392         *    btree.find_tag("TODAY", &t); // get today's date
393         */
394        bool find_tag(const string &key, string * tag) const;
395
396        /** Read the tag value for the key pointed to by cursor C_.
397         *
398         *  @param keep_compressed  Don't uncompress the tag - e.g. useful
399         *                          if it's just being opaquely copied.
400         *
401         *  @return     true if current_tag holds compressed data (always
402         *              false if keep_compressed was false).
403         */
404        bool read_tag(Cursor_ * C_, string *tag, bool keep_compressed) const;
405
406        /** Add a key/tag pair to the table, replacing any existing pair with
407         *  the same key.
408         *
409         *  If an error occurs during the operation, this will be signalled
410         *  by a return value of false.  All modifications since the
411         *  previous commit() will be lost.
412         *
413         *  If key is empty, then the null item is replaced.  If key.length()
414         *  exceeds the limit on key size, false is returned.
415         *
416         *  e.g.    ok = btree.add("TODAY", "Mon 9 Oct 2000");
417         *
418         *  @param key   The key to store in the table.
419         *  @param tag   The tag to store in the table.
420         *  @param already_compressed   true if tag is already compressed,
421         *              for example because it is being opaquely copied
422         *              (default: false).
423         *
424         *  @return true if the operation completed successfully, false
425         *          otherwise.
426         */
427        bool add(const string &key, string tag, bool already_compressed = false);
428
429        /** Delete an entry from the table.
430         *
431         *  The entry will be removed from the table, if it exists.  If
432         *  it does not exist, no action will be taken.  The item with
433         *  an empty key can't be removed, and false is returned.
434         *
435         *  If an error occurs during the operation, this will be signalled
436         *  by a return value of false.  All modifications since the
437         *  previous commit() will be lost.
438         *
439         *  e.g.    ok = btree.del("TODAY")
440         *
441         *  @param key   The key to remove from the table.
442         *
443         *  @return true if the operation completed successfully, false
444         *          otherwise.
445         */
446        bool del(const string &key);
447
448        /// Erase this table from disk.
449        void erase();
450
451        /** Set the block size.
452         *
453         *  It's only safe to do this before the table is created.
454         */
455        void set_block_size(unsigned int block_size_);
456
457        /** Get the block size.
458         */
459        unsigned int get_block_size() const { return block_size; }
460
461        /** Create a new empty btree structure on disk and open it at the
462         *  initial revision.
463         *
464         *  The table must be writable - it doesn't make sense to create
465         *  a table that is read-only!
466         *
467         *  The block size must be less than 64K, where K = 1024. It is unwise
468         *  to use a small block size (less than 1024 perhaps), so we enforce a
469         *  minimum block size of 2K.
470         *
471         *  Example:
472         *
473         *    Btree btree("X-");
474         *    btree.create_and_open(8192);
475         *    // Files will be X-DB, X-baseA (and X-baseB).
476         *
477         *  @param blocksize     - Size of blocks to use.
478         *
479         *  @exception Xapian::DatabaseCreateError if the table can't be
480         *      created.
481         *  @exception Xapian::InvalidArgumentError if the requested blocksize
482         *      is unsuitable.
483         */
484        void create_and_open(unsigned int blocksize);
485
486        void set_full_compaction(bool parity);
487
488        /** Get the latest revision number stored in this table.
489         *
490         *  This gives the higher of the revision numbers held in the base
491         *  files of the B-tree, or just the revision number if there's only
492         *  one base file.
493         *
494         *  It is possible that there are other, older, revisions of this
495         *  table available, and indeed that the revision currently open
496         *  is one of these older revisions.
497         */
498        flint_revision_number_t get_latest_revision_number() const {
499            return latest_revision_number;
500        }
501
502        /** Get the revision number at which this table
503         *  is currently open.
504         *
505         *  It is possible that there are other, more recent or older
506         *  revisions available.
507         *
508         *  @return the current revision number.
509         */
510        flint_revision_number_t get_open_revision_number() const {
511            return revision_number;
512        }
513
514        /** Return a count of the number of entries in the table.
515         *
516         *  The count does not include the ever-present item with null key.
517         *
518         *  @return The number of entries in the table.
519         */
520        flint_tablesize_t get_entry_count() const {
521            return item_count;
522        }
523
524        /** Get a cursor for reading from the table.
525         *
526         *  The cursor is owned by the caller - it is the caller's
527         *  responsibility to ensure that it is deleted.
528         */
529        FlintCursor * cursor_get() const;
530
531        /** Determine whether the object contains uncommitted modifications.
532         *
533         *  @return true if there have been modifications since the last
534         *          the last call to commit().
535         */
536        bool is_modified() const { return Btree_modified; }
537
538        /** Set the maximum item size given the block capacity.
539         *
540         *  At least this many items of maximum size must fit into a block.
541         *  The default is BLOCK_CAPACITY (which is currently 4).
542         */
543        void set_max_item_size(size_t block_capacity) {
544            if (block_capacity > 4) block_capacity = 4;
545            max_item_size = (block_size - 11 /*DIR_START*/ - block_capacity * D2)
546                / block_capacity;
547        }
548
549    protected:
550
551        /** Perform the opening operation to read.
552         *
553         *  Return true iff the open succeeded.
554         */
555        bool do_open_to_read(bool revision_supplied, flint_revision_number_t revision_);
556
557        /** Perform the opening operation to write.
558         *
559         *  Return true iff the open succeeded.
560         */
561        bool do_open_to_write(bool revision_supplied,
562                              flint_revision_number_t revision_,
563                              bool create_db = false);
564        bool basic_open(bool revision_supplied, flint_revision_number_t revision);
565
566        bool find(Cursor_ *) const;
567        int delete_kt();
568        void read_block(uint4 n, byte *p) const;
569        void write_block(uint4 n, const byte *p) const;
570        XAPIAN_NORETURN(void set_overwritten() const);
571        void block_to_cursor(Cursor_ *C_, int j, uint4 n) const;
572        void alter();
573        void compact(byte *p);
574        void enter_key(int j, Key_ prevkey, Key_ newkey);
575        int mid_point(byte *p);
576        void add_item_to_block(byte *p, Item_wr_ kt, int c);
577        void add_item(Item_wr_ kt, int j);
578        void delete_item(int j, bool repeatedly);
579        int add_kt(bool found);
580        void read_root();
581        void split_root(uint4 split_n);
582        void form_key(const string & key) const;
583
584        char other_base_letter() const {
585           return (base_letter == 'A') ? 'B' : 'A';
586        }
587
588        /** revision number of the opened B-tree. */
589        flint_revision_number_t revision_number;
590
591        /** keeps a count of the number of items in the B-tree. */
592        uint4 item_count;
593
594        /** block size of the B tree in bytes */
595        unsigned int block_size;
596
597        /** Revision number of the other base, or zero if there is only one
598         *  base file.
599         */
600        mutable flint_revision_number_t latest_revision_number;
601
602        /** set to true if baseA and baseB both exist as valid bases.
603         *
604         *  The unused base is deleted as soon as a write to the Btree takes
605         *  place. */
606        mutable bool both_bases;
607
608        /** the value 'A' or 'B' of the current base */
609        char base_letter;
610
611        /** true if the root block is faked (not written to disk).
612         * false otherwise.  This is true when the btree hasn't been
613         * modified yet.
614         */
615        bool faked_root_block;
616
617        /** true iff the data has been written in a single write in
618         * sequential order.
619         */
620        bool sequential;
621
622        /// corresponding file handle
623        int handle;
624
625        /// number of levels, counting from 0
626        int level;
627
628        /// the root block of the B-tree
629        uint4 root;
630
631        /// buffer of size block_size for making up key-tag items
632        mutable Item_wr_ kt;
633
634        /// buffer of size block_size for reforming blocks
635        byte * buffer;
636
637        /// For writing back as file baseA or baseB.
638        FlintTable_base base;
639
640        /// The path name of the B tree.
641        string name;
642
643        /** count of the number of successive instances of purely
644         * sequential addition, starting at SEQ_START_POINT (neg) and
645         * going up to zero. */
646        int seq_count;
647
648        /** the last block to be changed by an addition */
649        uint4 changed_n;
650
651        /** directory offset corresponding to last block to be changed
652         *  by an addition */
653        int changed_c;
654
655        /// maximum size of an item (key-tag pair)
656        size_t max_item_size;
657
658        /// Set to true the first time the B-tree is modified.
659        mutable bool Btree_modified;
660
661        /// set to true when full compaction is to be achieved
662        bool full_compaction;
663
664        /// Set to true when the database is opened to write.
665        bool writable;
666
667        /* B-tree navigation functions */
668        bool prev(Cursor_ *C_, int j) const {
669            if (sequential) return prev_for_sequential(C_, j);
670            return prev_default(C_, j);
671        }
672
673        bool next(Cursor_ *C_, int j) const {
674            if (sequential) return next_for_sequential(C_, j);
675            return next_default(C_, j);
676        }
677
678        /* Default implementations. */
679        bool prev_default(Cursor_ *C_, int j) const;
680        bool next_default(Cursor_ *C_, int j) const;
681
682        /* Implementations for sequential mode. */
683        bool prev_for_sequential(Cursor_ *C_, int dummy) const;
684        bool next_for_sequential(Cursor_ *C_, int dummy) const;
685
686        static int find_in_block(const byte * p, Key_ key, bool leaf, int c);
687
688        /** block_given_by(p, c) finds the item at block address p, directory
689         *  offset c, and returns its tag value as an integer.
690         */
691        static uint4 block_given_by(const byte * p, int c);
692
693        mutable Cursor_ C[BTREE_CURSOR_LEVELS];
694
695        /** Buffer used when splitting a block.
696         *
697         *  This buffer holds the split off part of the block.  It's only used
698         *  when updating (in FlintTable::add_item().
699         */
700        byte * split_p;
701
702        /** DONT_COMPRESS or Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY,
703         *  Z_RLE. */
704        int compress_strategy;
705
706        /// If true, don't create the table until it's needed.
707        bool lazy;
708
709        /* Debugging methods */
710//      void report_block_full(int m, int n, const byte * p);
711};
712
713#endif /* OM_HGUARD_FLINT_TABLE_H */
Note: See TracBrowser for help on using the browser.