root / tags / 1.0.8 / xapian-core / backends / flint / flint_synonym.cc

Revision 9275, 6.0 kB (checked in by olly, 16 months ago)

backends/flint/flint_spellingwordslist.cc,
backends/flint/flint_synonym.cc: Use FlintCursor::find_entry_ge().
backends/flint/flint_synonym.h: Use FlintCursor::find_entry_lt().
backends/flint/flint_alltermslist.h: Use FlintCursor::find_entry_lt()
and skip any keys before "\x00\xff" to allow for extra metadata
keys.

Line 
1/** @file flint_synonym.cc
2 * @brief Synonym data for a flint database.
3 */
4/* Copyright (C) 2004,2005,2006,2007 Olly Betts
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19 */
20
21#include <config.h>
22
23#include <xapian/error.h>
24
25#include "flint_cursor.h"
26#include "flint_synonym.h"
27#include "flint_utils.h"
28#include "stringutils.h"
29#include "vectortermlist.h"
30
31#include <set>
32#include <string>
33#include <vector>
34
35using namespace std;
36
37// We XOR the length values with this so that they are more likely to coincide
38// with lower case ASCII letters, which are likely to be common.  This means
39// that zlib should do a better job of compressing tag values.
40#define MAGIC_XOR_VALUE 96
41
42void
43FlintSynonymTable::merge_changes()
44{
45    if (last_term.empty()) return;
46
47    if (last_synonyms.empty()) {
48        del(last_term);
49    } else {
50        string tag;
51
52        set<string>::const_iterator i;
53        for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
54            const string & synonym = *i;
55            tag += (byte)(synonym.size() ^ MAGIC_XOR_VALUE);
56            tag += synonym;
57        }
58
59        add(last_term, tag);
60        last_synonyms.clear();
61    }
62    last_term.resize(0);
63}
64
65void
66FlintSynonymTable::add_synonym(const string & term, const string & synonym)
67{
68    if (last_term != term) {
69        merge_changes();
70        last_term = term;
71
72        string tag;
73        if (get_exact_entry(term, tag)) {
74            const char * p = tag.data();
75            const char * end = p + tag.size();
76            while (p != end) {
77                size_t len;
78                if (p == end ||
79                    (len = (byte)*p ^ MAGIC_XOR_VALUE) >= size_t(end - p))
80                    throw Xapian::DatabaseCorruptError("Bad synonym data");
81                ++p;
82                last_synonyms.insert(string(p, len));
83                p += len;
84            }
85        }
86    }
87
88    last_synonyms.insert(synonym);
89}
90
91void
92FlintSynonymTable::remove_synonym(const string & term, const string & synonym)
93{
94    if (last_term != term) {
95        merge_changes();
96        last_term = term;
97
98        string tag;
99        if (get_exact_entry(term, tag)) {
100            const char * p = tag.data();
101            const char * end = p + tag.size();
102            while (p != end) {
103                size_t len;
104                if (p == end ||
105                    (len = (byte)*p ^ MAGIC_XOR_VALUE) >= size_t(end - p))
106                    throw Xapian::DatabaseCorruptError("Bad synonym data");
107                ++p;
108                last_synonyms.insert(string(p, len));
109                p += len;
110            }
111        }
112    }
113
114    last_synonyms.erase(synonym);
115}
116
117void
118FlintSynonymTable::clear_synonyms(const string & term)
119{
120    // We don't actually ever need to merge_changes() here, but it's quite
121    // likely that someone might clear_synonyms() and then add_synonym() for
122    // the same term.  The alternative we could otherwise optimise for (modify
123    // synonyms for a term, then clear those for another, then modify those for
124    // the first term again) seems much less likely.
125    if (last_term == term) {
126        last_synonyms.clear();
127    } else {
128        merge_changes();
129        last_term = term;
130    }
131}
132
133TermList *
134FlintSynonymTable::open_termlist(const string & term)
135{
136    vector<string> synonyms;
137
138    if (last_term == term) {
139        if (last_synonyms.empty()) return NULL;
140
141        synonyms.reserve(last_synonyms.size());
142        set<string>::const_iterator i;
143        for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
144            synonyms.push_back(*i);
145        }
146    } else {
147        string tag;
148        if (!get_exact_entry(term, tag)) return NULL;
149
150        const char * p = tag.data();
151        const char * end = p + tag.size();
152        while (p != end) {
153            size_t len;
154            if (p == end ||
155                (len = (byte)*p ^ MAGIC_XOR_VALUE) >= size_t(end - p))
156                throw Xapian::DatabaseCorruptError("Bad synonym data");
157            ++p;
158            synonyms.push_back(string(p, len));
159            p += len;
160        }
161    }
162
163    return new VectorTermList(synonyms.begin(), synonyms.end());
164}
165
166///////////////////////////////////////////////////////////////////////////
167
168FlintSynonymTermList::~FlintSynonymTermList()
169{
170    DEBUGCALL(DB, void, "~FlintSynonymTermList", "");
171    delete cursor;
172}
173
174Xapian::termcount
175FlintSynonymTermList::get_approx_size() const
176{
177    DEBUGCALL(DB, Xapian::termcount, "FlintSynonymTermList::get_approx_size", "");
178    RETURN(size);
179}
180
181string
182FlintSynonymTermList::get_termname() const
183{
184    DEBUGCALL(DB, string, "FlintSynonymTermList::get_termname", "");
185    Assert(cursor);
186    Assert(!cursor->current_key.empty());
187    Assert(!at_end());
188    RETURN(cursor->current_key);
189}
190
191Xapian::doccount
192FlintSynonymTermList::get_termfreq() const
193{
194    throw Xapian::InvalidOperationError("FlintSynonymTermList::get_termfreq() not meaningful");
195}
196
197Xapian::termcount
198FlintSynonymTermList::get_collection_freq() const
199{
200    throw Xapian::InvalidOperationError("FlintSynonymTermList::get_collection_freq() not meaningful");
201}
202
203TermList *
204FlintSynonymTermList::next()
205{
206    DEBUGCALL(DB, TermList *, "FlintSynonymTermList::next", "");
207    Assert(!at_end());
208
209    cursor->next();
210    if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
211        // We've reached the end of the end of the prefixed terms.
212        cursor->to_end();
213    }
214
215    RETURN(NULL);
216}
217
218TermList *
219FlintSynonymTermList::skip_to(const string &tname)
220{
221    DEBUGCALL(DB, TermList *, "FlintSynonymTermList::skip_to", tname);
222    Assert(!at_end());
223
224    if (!cursor->find_entry_ge(tname)) {
225        // The exact term we asked for isn't there, so check if the next
226        // term after it also has the right prefix.
227        if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
228            // We've reached the end of the prefixed terms.
229            cursor->to_end();
230        }
231    }
232    RETURN(NULL);
233}
234
235bool
236FlintSynonymTermList::at_end() const
237{
238    DEBUGCALL(DB, bool, "FlintSynonymTermList::at_end", "");
239    RETURN(cursor->after_end());
240}
Note: See TracBrowser for help on using the browser.