root / tags / 1.0.8 / xapian-core / backends / flint / flint_spelling.h

Revision 9689, 4.4 kB (checked in by olly, 14 months ago)

api/maptermlist.h,api/termlist.cc,backends/alltermslist.cc,
backends/flint/flint_spelling.cc,backends/flint/flint_spelling.h,
common/alltermslist.h,common/termlist.h,common/vectortermlist.h:
Provide a default implementation of accumulate_stats() in the
virtual base class TermIterator::Internal instead of repeating it
in each subclass which doesn't get used for generating an ESet, and
don't call abort() in the default implementation - an Assert(false)
is sufficient, and more consistent with how we handle other similar
cases.

Line 
1/** @file flint_spelling.h
2 * @brief Spelling correction data for a flint database.
3 */
4/* Copyright (C) 2007 Olly Betts
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19 */
20
21#ifndef XAPIAN_INCLUDED_FLINT_SPELLING_H
22#define XAPIAN_INCLUDED_FLINT_SPELLING_H
23
24#include <xapian/types.h>
25
26#include "flint_table.h"
27#include "termlist.h"
28
29#include <map>
30#include <set>
31#include <string>
32#include <string.h> // For memcpy() and memcmp().
33
34struct fragment {
35    char data[4];
36
37    // Default constructor.
38    fragment() { }
39
40    // Allow implicit conversion.
41    fragment(char data_[4]) { memcpy(data, data_, 4); }
42
43    char & operator[] (unsigned i) { return data[i]; }
44    const char & operator[] (unsigned i) const { return data[i]; }
45
46    operator std::string () const {
47        return string(data, data[0] == 'M' ? 4 : 3);
48    }
49};
50
51inline bool operator<(const fragment &a, const fragment &b) {
52    return memcmp(a.data, b.data, 4) < 0;
53}
54
55class FlintSpellingTable : public FlintTable {
56    void add_fragment(fragment frag, const string & word);
57    void remove_fragment(fragment frag, const string & word);
58
59    std::map<std::string, Xapian::termcount> wordfreq_changes;
60    std::map<fragment, std::set<std::string> > termlist_deltas;
61
62  public:
63    /** Create a new FlintSpellingTable object.
64     *
65     *  This method does not create or open the table on disk - you
66     *  must call the create() or open() methods respectively!
67     *
68     *  @param dbdir            The directory the flint database is stored in.
69     *  @param readonly         true if we're opening read-only, else false.
70     */
71    FlintSpellingTable(std::string dbdir, bool readonly)
72        : FlintTable(dbdir + "/spelling.", readonly, Z_DEFAULT_STRATEGY, true) { }
73
74    // Merge in batched-up changes.
75    void merge_changes();
76
77    void add_word(const std::string & word, Xapian::termcount freqinc);
78    void remove_word(const std::string & word, Xapian::termcount freqdec);
79
80    TermList * open_termlist(const std::string & word);
81
82    Xapian::doccount get_word_frequency(const string & word) const;
83
84    /** Override methods of FlintTable.
85     *
86     *  NB: these aren't virtual, but we always call them on the subclass in
87     *  cases where it matters).
88     *  @{
89     */
90
91    bool is_modified() const {
92        return !wordfreq_changes.empty() || FlintTable::is_modified();
93    }
94
95    void create_and_open(unsigned int blocksize) {
96        // The spelling table is created lazily, but erase it in case we're
97        // overwriting an existing database and it already exists.
98        FlintTable::erase();
99        FlintTable::set_block_size(blocksize);
100    }
101
102    void commit(flint_revision_number_t revision) {
103        merge_changes();
104        FlintTable::commit(revision);
105    }
106
107    void cancel() {
108        // Discard batched-up changes.
109        wordfreq_changes.clear();
110        termlist_deltas.clear();
111
112        FlintTable::cancel();
113    }
114
115    // @}
116};
117
118/** The list of words containing a particular trigram. */
119class FlintSpellingTermList : public TermList {
120    /// The encoded data.
121    std::string data;
122
123    /// Position in the data.
124    unsigned p;
125
126    /// The current term.
127    std::string current_term;
128
129    /// Copying is not allowed.
130    FlintSpellingTermList(const FlintSpellingTermList &);
131
132    /// Assignment is not allowed.
133    void operator=(const FlintSpellingTermList &);
134
135  public:
136    /// Constructor.
137    FlintSpellingTermList(const std::string & data_)
138        : data(data_), p(0) { }
139
140    /// Destructor.
141    ~FlintSpellingTermList();
142
143    Xapian::termcount get_approx_size() const;
144
145    std::string get_termname() const;
146
147    Xapian::termcount get_wdf() const;
148
149    Xapian::doccount get_termfreq() const;
150
151    Xapian::termcount get_collection_freq() const;
152
153    TermList *next();
154
155    bool at_end() const;
156
157    Xapian::termcount positionlist_count() const;
158
159    Xapian::PositionIterator positionlist_begin() const;
160};
161
162#endif // XAPIAN_INCLUDED_FLINT_SPELLING_H
Note: See TracBrowser for help on using the browser.