root / tags / 1.0.8 / xapian-core / backends / flint / flint_btreebase.cc

Revision 10751, 12.7 kB (checked in by olly, 6 months ago)

backends/flint/flint_btreebase.cc: Add explicit '#include <cstring>'
for GCC 4.3.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* flint_btreebase.cc: Btree base file implementation
2 *
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002,2003,2004,2006 Olly Betts
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
19 * USA
20 */
21
22#include <config.h>
23
24#include "safeerrno.h"
25#ifdef __WIN32__
26# include "msvc_posix_wrapper.h"
27#endif
28
29#include "flint_btreebase.h"
30#include "flint_io.h"
31#include "flint_utils.h"
32#include "utils.h"
33#include <xapian/error.h>
34#include "omassert.h"
35
36#include <limits.h>
37
38#include <algorithm>
39#include <cstring>
40
41using namespace std;
42
43/** A tiny class used to close a filehandle safely in the presence
44 *  of exceptions.
45 */
46class fdcloser {
47    public:
48        fdcloser(int fd_) : fd(fd_) {}
49        ~fdcloser() {
50            if (fd >= 0) {
51                (void)close(fd);
52            }
53        }
54    private:
55        int fd;
56};
57
58/************ Base file parameters ************/
59
60/** This is the current description of the base file format:
61 *
62 * Numbers are (unless mentioned otherwise) stored in the variable
63 * length format used by pack_uint() - that is 7 bits at a time in
64 * a byte, starting with lower-order bits, and setting the high bit
65 * on all bytes before the last one.
66 *
67 * The format consists of a sequence of numbers in this order:
68 *
69 * REVISION
70 * FORMAT       will be = CURR_FORMAT for the current format.  If this value
71 *              higher then it is a different format which we
72 *              doesn't yet understand, so we bomb out.  If it's lower,
73 *              then it depends if we have backwards-compatibility code
74 *              implemented (we don't for format versions < 5).
75 * BLOCK_SIZE
76 * ROOT
77 * LEVEL
78 * BIT_MAP_SIZE
79 * ITEM_COUNT
80 * LAST_BLOCK
81 * HAVE_FAKEROOT
82 * REVISION2    A second copy of the revision number, for consistency checks.
83 * BITMAP       The bitmap.  This will be BIT_MAP_SIZE raw bytes.
84 * REVISION3    A third copy of the revision number, for consistency checks.
85 */
86#define CURR_FORMAT 5U
87
88FlintTable_base::FlintTable_base()
89        : revision(0),
90          block_size(0),
91          root(0),
92          level(0),
93          bit_map_size(0),
94          item_count(0),
95          last_block(0),
96          have_fakeroot(false),
97          sequential(false),
98          bit_map_low(0),
99          bit_map0(0),
100          bit_map(0)
101{
102}
103
104FlintTable_base::FlintTable_base(const FlintTable_base &other)
105        : revision(other.revision),
106          block_size(other.block_size),
107          root(other.root),
108          level(other.level),
109          bit_map_size(other.bit_map_size),
110          item_count(other.item_count),
111          last_block(other.last_block),
112          have_fakeroot(other.have_fakeroot),
113          sequential(other.sequential),
114          bit_map_low(other.bit_map_low),
115          bit_map0(0),
116          bit_map(0)
117{
118    try {
119        bit_map0 = new byte[bit_map_size];
120        bit_map = new byte[bit_map_size];
121
122        memcpy(bit_map0, other.bit_map0, bit_map_size);
123        memcpy(bit_map, other.bit_map, bit_map_size);
124    } catch (...) {
125        delete [] bit_map0;
126        delete [] bit_map;
127    }
128}
129
130void
131FlintTable_base::swap(FlintTable_base &other)
132{
133    std::swap(revision, other.revision);
134    std::swap(block_size, other.block_size);
135    std::swap(root, other.root);
136    std::swap(level, other.level);
137    std::swap(bit_map_size, other.bit_map_size);
138    std::swap(item_count, other.item_count);
139    std::swap(last_block, other.last_block);
140    std::swap(have_fakeroot, other.have_fakeroot);
141    std::swap(sequential, other.sequential);
142    std::swap(bit_map_low, other.bit_map_low);
143    std::swap(bit_map0, other.bit_map0);
144    std::swap(bit_map, other.bit_map);
145}
146
147FlintTable_base::~FlintTable_base()
148{
149    delete [] bit_map;
150    bit_map = 0;
151    delete [] bit_map0;
152    bit_map0 = 0;
153}
154
155bool
156FlintTable_base::do_unpack_uint(const char **start, const char *end,
157                           uint4 *dest, string &err_msg,
158                           const string &basename,
159                           const char *varname)
160{
161    bool result = unpack_uint(start, end, dest);
162    if (!result) {
163        err_msg += "Unable to read " + string(varname) + " from " +
164                    basename + "\n";
165    }
166    return result;
167}
168
169#define DO_UNPACK_UINT_ERRCHECK(start, end, var) \
170do { \
171    if (!do_unpack_uint(start, end, &var, err_msg, basename, #var)) { \
172        return false; \
173    } \
174} while(0)
175
176/* How much of the base file to read at the first go (in bytes).
177 * This must be big enough that the base file without bitmap
178 * will fit in to this size with no problem.  Other than that
179 * it's fairly arbitrary, but shouldn't be big enough to cause
180 * serious memory problems!
181 */
182#define REASONABLE_BASE_SIZE 1024
183
184bool
185FlintTable_base::read(const string & name, char ch, string &err_msg)
186{
187    string basename = name + "base" + ch;
188#ifdef __WIN32__
189    int h = msvc_posix_open(basename.c_str(), O_RDONLY | O_BINARY);
190#else
191    int h = open(basename.c_str(), O_RDONLY | O_BINARY);
192#endif
193
194    if (h == -1) {
195        err_msg += "Couldn't open " + basename + ": " + strerror(errno) + "\n";
196        return false;
197    }
198    fdcloser closefd(h);
199
200    char buf[REASONABLE_BASE_SIZE];
201
202    const char *start = buf;
203    const char *end = buf + flint_io_read(h, buf, REASONABLE_BASE_SIZE, 0);
204
205    DO_UNPACK_UINT_ERRCHECK(&start, end, revision);
206    uint4 format;
207    DO_UNPACK_UINT_ERRCHECK(&start, end, format);
208    if (format != CURR_FORMAT) {
209        err_msg += "Bad base file format " + om_tostring(format) + " in " +
210                    basename + "\n";
211        return false;
212    }
213    DO_UNPACK_UINT_ERRCHECK(&start, end, block_size);
214    DO_UNPACK_UINT_ERRCHECK(&start, end, root);
215    DO_UNPACK_UINT_ERRCHECK(&start, end, level);
216    DO_UNPACK_UINT_ERRCHECK(&start, end, bit_map_size);
217    DO_UNPACK_UINT_ERRCHECK(&start, end, item_count);
218    DO_UNPACK_UINT_ERRCHECK(&start, end, last_block);
219    uint4 have_fakeroot_;
220    DO_UNPACK_UINT_ERRCHECK(&start, end, have_fakeroot_);
221    have_fakeroot = have_fakeroot_;
222
223    uint4 sequential_;
224    DO_UNPACK_UINT_ERRCHECK(&start, end, sequential_);
225    sequential = sequential_;
226
227    if (have_fakeroot && !sequential) {
228        sequential = true; // FIXME : work out why we need this...
229        /*
230        err_msg += "Corrupt base file, `" + basename + "':\n"
231                "sequential must be set whenever have_fakeroot is set.\n" +
232                "sequential=" + (sequential?"true":"false") +
233                ", have_fakeroot=" + (have_fakeroot?"true":"false") + "\n";
234        return false;
235        */
236    }
237
238    uint4 revision2;
239    DO_UNPACK_UINT_ERRCHECK(&start, end, revision2);
240    if (revision != revision2) {
241        err_msg += "Revision number mismatch in " +
242                basename + ": " +
243                om_tostring(revision) + " vs " + om_tostring(revision2) + "\n";
244        return false;
245    }
246
247    /* It's ok to delete a zero pointer */
248    delete [] bit_map0;
249    bit_map0 = 0;
250    delete [] bit_map;
251    bit_map = 0;
252
253    bit_map0 = new byte[bit_map_size];
254    bit_map = new byte[bit_map_size];
255
256    size_t n = end - start;
257    if (n < bit_map_size) {
258        memcpy(bit_map0, start, n);
259        (void)flint_io_read(h, reinterpret_cast<char *>(bit_map0) + n,
260                            bit_map_size - n, bit_map_size - n);
261        n = 0;
262    } else {
263        memcpy(bit_map0, start, bit_map_size);
264        n -= bit_map_size;
265        if (n) memmove(buf, start + bit_map_size, n);
266    }
267    memcpy(bit_map, bit_map0, bit_map_size);
268
269    start = buf;
270    end = buf + n;
271    end += flint_io_read(h, buf + n, REASONABLE_BASE_SIZE - n, 0);
272
273    uint4 revision3;
274    if (!unpack_uint(&start, end, &revision3)) {
275        err_msg += "Couldn't read revision3 from base file " +
276        basename + "\n";
277        return false;
278    }
279
280    if (revision != revision3) {
281        err_msg += "Revision number mismatch in " +
282                basename + ": " +
283                om_tostring(revision) + " vs " + om_tostring(revision3) + "\n";
284        return false;
285    }
286
287    if (start != end) {
288        err_msg += "Junk at end of base file " + basename + "\n";
289        return false;
290    }
291
292    return true;
293}
294
295void
296FlintTable_base::write_to_file(const string &filename)
297{
298    calculate_last_block();
299
300    string buf;
301    buf += pack_uint(revision);
302    buf += pack_uint(CURR_FORMAT);
303    buf += pack_uint(block_size);
304    buf += pack_uint(static_cast<uint4>(root));
305    buf += pack_uint(static_cast<uint4>(level));
306    buf += pack_uint(static_cast<uint4>(bit_map_size));
307    buf += pack_uint(static_cast<uint4>(item_count));
308    buf += pack_uint(static_cast<uint4>(last_block));
309    buf += pack_uint(have_fakeroot);
310    buf += pack_uint(sequential);
311    buf += pack_uint(revision);
312    if (bit_map_size > 0) {
313        buf.append(reinterpret_cast<const char *>(bit_map), bit_map_size);
314    }
315    buf += pack_uint(revision);  // REVISION2
316
317#ifdef __WIN32__
318    int h = msvc_posix_open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY);
319#else
320    int h = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
321#endif
322    if (h < 0) {
323        string message = string("Couldn't open base ")
324                + filename + " to write: " + strerror(errno);
325        throw Xapian::DatabaseOpeningError(message);
326    }
327    fdcloser closefd(h);
328
329    flint_io_write(h, buf.data(), buf.size());
330    flint_io_sync(h);
331}
332
333/*
334   block_free_at_start(B, n) is true iff (if and only if) block n was free at
335   the start of the transaction on the B-tree.
336*/
337
338bool
339FlintTable_base::block_free_at_start(uint4 n) const
340{
341    size_t i = n / CHAR_BIT;
342    int bit = 0x1 << n % CHAR_BIT;
343    return (bit_map0[i] & bit) == 0;
344}
345
346/* free_block(B, n) causes block n to be marked free in the bit map.
347   B->bit_map_low is the lowest byte in the bit map known to have a free bit
348   set. Searching starts from there when looking for a free block.
349*/
350
351void
352FlintTable_base::free_block(uint4 n)
353{
354    uint4 i = n / CHAR_BIT;
355    int bit = 0x1 << n % CHAR_BIT;
356    bit_map[i] &= ~ bit;
357
358    if (bit_map_low > i)
359        if ((bit_map0[i] & bit) == 0) /* free at start */
360            bit_map_low = i;
361}
362
363/* extend(B) increases the size of the two bit maps in an obvious way.
364   The bitmap file grows and shrinks as the DB file grows and shrinks in
365   internal usage. But the DB file itself does not reduce in size, no matter
366   how many blocks are freed.
367*/
368
369#define BIT_MAP_INC 1000
370    /* increase the bit map by this number of bytes if it overflows */
371
372void
373FlintTable_base::extend_bit_map()
374{
375    int n = bit_map_size + BIT_MAP_INC;
376    byte *new_bit_map0 = 0;
377    byte *new_bit_map = 0;
378
379    try {
380        new_bit_map0 = new byte[n];
381        new_bit_map = new byte[n];
382
383        memcpy(new_bit_map0, bit_map0, bit_map_size);
384        memset(new_bit_map0 + bit_map_size, 0, n - bit_map_size);
385       
386        memcpy(new_bit_map, bit_map, bit_map_size);
387        memset(new_bit_map + bit_map_size, 0, n - bit_map_size);
388    } catch (...) {
389        delete [] new_bit_map0;
390        delete [] new_bit_map;
391        throw;
392    }
393    delete [] bit_map0;
394    bit_map0 = new_bit_map0;
395    delete [] bit_map;
396    bit_map = new_bit_map;
397    bit_map_size = n;
398}
399
400/* next_free_block(B) returns the number of the next available free block in
401   the bitmap, marking it as 'in use' before returning. More precisely, we get
402   a block that is both free now (in bit_map) and was free at the beginning of
403   the transaction on the B-tree (in bit_map0).
404
405   Starting at bit_map_low we go up byte at a time until we find a byte with a
406   free (zero) bit, and then go up that byte bit at a time. If the bit map has
407   no free bits it is extended so that it will have.
408*/
409
410uint4
411FlintTable_base::next_free_block()
412{
413    uint4 i;
414    int x;
415    for (i = bit_map_low;; i++) {
416        if (i >= bit_map_size) {
417            extend_bit_map();
418        }
419        x = bit_map0[i] | bit_map[i];
420        if (x != UCHAR_MAX) break;
421    }
422    uint4 n = i * CHAR_BIT;
423    int d = 0x1;
424    while ((x & d) != 0) { d <<= 1; n++; }
425    bit_map[i] |= d;   /* set as 'in use' */
426    bit_map_low = i;
427    if (n > last_block) {
428        last_block = n;
429    }
430    return n;
431}
432
433bool
434FlintTable_base::block_free_now(uint4 n)
435{
436    uint4 i = n / CHAR_BIT;
437    int bit = 0x1 << n % CHAR_BIT;
438    return (bit_map[i] & bit) == 0;
439}
440
441void
442FlintTable_base::calculate_last_block()
443{
444    if (bit_map_size == 0) {
445        last_block = 0;
446        return;
447    }
448    int i = bit_map_size - 1;
449    while (bit_map[i] == 0 && i > 0) {
450        i--;
451    }
452    bit_map_size = i + 1;
453
454    int x = bit_map[i];
455
456    /* Check for when there are no blocks */
457    if (x == 0) {
458        last_block = 0;
459        return;
460    }
461    uint4 n = (i + 1) * CHAR_BIT - 1;
462    int d = 0x1 << (CHAR_BIT - 1);
463    while ((x & d) == 0) { d >>= 1; n--; }
464
465    last_block = n;
466}
467
468bool
469FlintTable_base::is_empty() const
470{
471    for (uint4 i = 0; i < bit_map_size; i++) {
472        if (bit_map[i] != 0) {
473            return false;
474        }
475    }
476    return true;
477}
478
479void
480FlintTable_base::clear_bit_map()
481{
482    memset(bit_map, 0, bit_map_size);
483}
484
485// We've commited, so "bitmap at start" needs to be reset to the current bitmap.
486void
487FlintTable_base::commit()
488{
489    memcpy(bit_map0, bit_map, bit_map_size);
490    bit_map_low = 0;
491}
Note: See TracBrowser for help on using the browser.