root / tags / 1.0.8 / xapian-core / backends / flint / flint_database.h
| Revision 9281, 10.8 kB (checked in by olly, 16 months ago) | |
|---|---|
|
|
| Line | |
|---|---|
| 1 | /* flint_database.h: C++ class definition for flint database |
| 2 | * |
| 3 | * Copyright 1999,2000,2001 BrightStation PLC |
| 4 | * Copyright 2002 Ananova Ltd |
| 5 | * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU General Public License as |
| 9 | * published by the Free Software Foundation; either version 2 of the |
| 10 | * License, or (at your option) any later version. |
| 11 | * |
| 12 | * This program is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | * GNU General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License |
| 18 | * along with this program; if not, write to the Free Software |
| 19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 |
| 20 | * USA |
| 21 | */ |
| 22 | |
| 23 | #ifndef OM_HGUARD_FLINT_DATABASE_H |
| 24 | #define OM_HGUARD_FLINT_DATABASE_H |
| 25 | |
| 26 | #include "database.h" |
| 27 | #include "flint_positionlist.h" |
| 28 | #include "flint_postlist.h" |
| 29 | #include "flint_record.h" |
| 30 | #include "flint_spelling.h" |
| 31 | #include "flint_synonym.h" |
| 32 | #include "flint_termlisttable.h" |
| 33 | #include "flint_values.h" |
| 34 | #include "flint_version.h" |
| 35 | #include "flint_lock.h" |
| 36 | |
| 37 | #include "flint_types.h" |
| 38 | |
| 39 | #include <map> |
| 40 | |
| 41 | class FlintTermList; |
| 42 | class FlintAllDocsPostList; |
| 43 | |
| 44 | const int XAPIAN_DB_READONLY = 0; |
| 45 | |
| 46 | /** A backend designed for efficient indexing and retrieval, using |
| 47 | * compressed posting lists and a btree storage scheme. |
| 48 | */ |
| 49 | class FlintDatabase : public Xapian::Database::Internal { |
| 50 | friend class FlintWritableDatabase; |
| 51 | friend class FlintTermList; |
| 52 | friend class FlintPostList; |
| 53 | friend class FlintAllTermsList; |
| 54 | friend class FlintAllDocsPostList; |
| 55 | private: |
| 56 | /** Directory to store databases in. |
| 57 | */ |
| 58 | std::string db_dir; |
| 59 | |
| 60 | /** Whether the database is readonly. |
| 61 | */ |
| 62 | bool readonly; |
| 63 | |
| 64 | /** The file describing the Flint database. |
| 65 | * This file has information about the format of the database |
| 66 | * which can't easily be stored in any of the individual tables. |
| 67 | */ |
| 68 | FlintVersion version_file; |
| 69 | |
| 70 | /** Table storing posting lists. |
| 71 | * |
| 72 | * Whenever an update is performed, this table is the first to be |
| 73 | * updated: therefore, its most recent revision number is the most |
| 74 | * recent anywhere in the database. |
| 75 | */ |
| 76 | mutable FlintPostListTable postlist_table; |
| 77 | |
| 78 | /** Table storing position lists. |
| 79 | */ |
| 80 | FlintPositionListTable position_table; |
| 81 | |
| 82 | /** Table storing term lists. |
| 83 | */ |
| 84 | FlintTermListTable termlist_table; |
| 85 | |
| 86 | /** Table storing values. |
| 87 | */ |
| 88 | FlintValueTable value_table; |
| 89 | |
| 90 | /** Table storing synonym data. |
| 91 | */ |
| 92 | mutable FlintSynonymTable synonym_table; |
| 93 | |
| 94 | /** Table storing spelling correction data. |
| 95 | */ |
| 96 | mutable FlintSpellingTable spelling_table; |
| 97 | |
| 98 | /** Table storing records. |
| 99 | * |
| 100 | * Whenever an update is performed, this table is the last to be |
| 101 | * updated: therefore, its most recent revision number is the most |
| 102 | * recent consistent revision available. If this table's most |
| 103 | * recent revision number is not available for all tables, there |
| 104 | * is no consistent revision available, and the database is corrupt. |
| 105 | */ |
| 106 | FlintRecordTable record_table; |
| 107 | |
| 108 | /// Lock object. |
| 109 | FlintLock lock; |
| 110 | |
| 111 | /** Total length of all documents including unflushed modifications. */ |
| 112 | mutable flint_totlen_t total_length; |
| 113 | |
| 114 | /** Highest document ID ever allocated by this database. */ |
| 115 | mutable Xapian::docid lastdocid; |
| 116 | |
| 117 | /// Read lastdocid and total_length from the postlist table. |
| 118 | void read_metainfo(); |
| 119 | |
| 120 | /** Return true if a database exists at the path specified for this |
| 121 | * database. |
| 122 | */ |
| 123 | bool database_exists(); |
| 124 | |
| 125 | /** Create new tables, and open them. |
| 126 | * Any existing tables will be removed first. |
| 127 | */ |
| 128 | void create_and_open_tables(unsigned int blocksize); |
| 129 | |
| 130 | /** Open all tables at most recent consistent revision. |
| 131 | * |
| 132 | * @exception Xapian::DatabaseCorruptError is thrown if there is no |
| 133 | * consistent revision available. |
| 134 | */ |
| 135 | void open_tables_consistent(); |
| 136 | |
| 137 | /** Get a write lock on the database, or throw an |
| 138 | * Xapian::DatabaseLockError if failure. |
| 139 | */ |
| 140 | void get_database_write_lock(); |
| 141 | |
| 142 | /** Open tables at specified revision number. |
| 143 | * |
| 144 | * @exception Xapian::InvalidArgumentError is thrown if the specified |
| 145 | * revision is not available. |
| 146 | */ |
| 147 | void open_tables(flint_revision_number_t revision); |
| 148 | |
| 149 | /** Get an object holding the revision number which the tables are |
| 150 | * opened at. |
| 151 | * |
| 152 | * @return the current revision number. |
| 153 | */ |
| 154 | flint_revision_number_t get_revision_number() const; |
| 155 | |
| 156 | /** Get an object holding the next revision number which should be |
| 157 | * used in the tables. |
| 158 | * |
| 159 | * @return the next revision number. |
| 160 | */ |
| 161 | flint_revision_number_t get_next_revision_number() const; |
| 162 | |
| 163 | /** Set the revision number in the tables. |
| 164 | * |
| 165 | * This updates the disk tables so that the currently open revision |
| 166 | * becomes the specified revision number. |
| 167 | * |
| 168 | * @param new_revision The new revision number to store. This must |
| 169 | * be greater than the latest revision number (see |
| 170 | * get_latest_revision_number()), or undefined behaviour will |
| 171 | * result. |
| 172 | */ |
| 173 | void set_revision_number(flint_revision_number_t new_revision); |
| 174 | |
| 175 | /** Re-open tables to recover from an overwritten condition, |
| 176 | * or just get most up-to-date version. |
| 177 | */ |
| 178 | void reopen(); |
| 179 | |
| 180 | /** Apply any outstanding changes to the tables. |
| 181 | * |
| 182 | * If an error occurs during this operation, this will be signalled |
| 183 | * by an exception being thrown. In this case the contents of the |
| 184 | * tables on disk will be left in an unmodified state (though possibly |
| 185 | * with increased revision numbers), and the outstanding changes will |
| 186 | * be lost. |
| 187 | */ |
| 188 | void apply(); |
| 189 | |
| 190 | /** Cancel any outstanding changes to the tables. |
| 191 | */ |
| 192 | void cancel(); |
| 193 | |
| 194 | public: |
| 195 | /** Create and open a flint database. |
| 196 | * |
| 197 | * @exception Xapian::DatabaseCorruptError is thrown if there is no |
| 198 | * consistent revision available. |
| 199 | * |
| 200 | * @exception Xapian::DatabaseOpeningError thrown if database can't |
| 201 | * be opened. |
| 202 | * |
| 203 | * @exception Xapian::DatabaseVersionError thrown if database is in an |
| 204 | * unsupported format. This implies that the database was |
| 205 | * created by an older or newer version of Xapian. |
| 206 | * |
| 207 | * @param dbdir directory holding flint tables |
| 208 | * |
| 209 | * @param block_size Block size, in bytes, to use when creating |
| 210 | * tables. This is only important, and has the |
| 211 | * correct value, when the database is being |
| 212 | * created. |
| 213 | */ |
| 214 | FlintDatabase(const string &db_dir_, int action = XAPIAN_DB_READONLY, |
| 215 | unsigned int block_size = 0u); |
| 216 | |
| 217 | ~FlintDatabase(); |
| 218 | |
| 219 | /** Virtual methods of Database::Internal. */ |
| 220 | //@{ |
| 221 | Xapian::doccount get_doccount() const; |
| 222 | Xapian::docid get_lastdocid() const; |
| 223 | Xapian::doclength get_avlength() const; |
| 224 | Xapian::doclength get_doclength(Xapian::docid did) const; |
| 225 | Xapian::doccount get_termfreq(const string & tname) const; |
| 226 | Xapian::termcount get_collection_freq(const string & tname) const; |
| 227 | bool term_exists(const string & tname) const; |
| 228 | bool has_positions() const; |
| 229 | |
| 230 | LeafPostList * open_post_list(const string & tname) const; |
| 231 | Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy = false) const; |
| 232 | |
| 233 | PositionList * open_position_list(Xapian::docid did, const string & term) const; |
| 234 | TermList * open_term_list(Xapian::docid did) const; |
| 235 | TermList * open_allterms(const string & prefix) const; |
| 236 | |
| 237 | TermList * open_spelling_termlist(const string & word) const; |
| 238 | TermList * open_spelling_wordlist() const; |
| 239 | Xapian::doccount get_spelling_frequency(const string & word) const; |
| 240 | |
| 241 | TermList * open_synonym_termlist(const string & term) const; |
| 242 | TermList * open_synonym_keylist(const string & prefix) const; |
| 243 | |
| 244 | string get_metadata(const string & key) const; |
| 245 | //@} |
| 246 | }; |
| 247 | |
| 248 | /** A writable flint database. |
| 249 | */ |
| 250 | class FlintWritableDatabase : public FlintDatabase { |
| 251 | /** Unflushed changes to term frequencies and collection frequencies. */ |
| 252 | mutable map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > |
| 253 | freq_deltas; |
| 254 | |
| 255 | /** Document lengths of new and modified documents which haven't been flushed yet. */ |
| 256 | mutable map<Xapian::docid, Xapian::termcount> doclens; |
| 257 | |
| 258 | /// Modifications to posting lists. |
| 259 | mutable map<string, map<Xapian::docid, |
| 260 | pair<char, Xapian::termcount> > > mod_plists; |
| 261 | |
| 262 | /** The number of documents added, deleted, or replaced since the last |
| 263 | * flush. |
| 264 | */ |
| 265 | mutable Xapian::doccount change_count; |
| 266 | |
| 267 | /// If change_count reaches this threshold we automatically flush. |
| 268 | Xapian::doccount flush_threshold; |
| 269 | |
| 270 | /// Flush any unflushed postlist changes, but don't commit them. |
| 271 | void flush_postlist_changes() const; |
| 272 | |
| 273 | //@{ |
| 274 | /** Implementation of virtual methods: see Database::Internal for |
| 275 | * details. |
| 276 | */ |
| 277 | void flush(); |
| 278 | |
| 279 | /** Cancel pending modifications to the database. */ |
| 280 | void cancel(); |
| 281 | |
| 282 | Xapian::docid add_document(const Xapian::Document & document); |
| 283 | Xapian::docid add_document_(Xapian::docid did, const Xapian::Document & document); |
| 284 | // Stop the default implementation of delete_document(term) and |
| 285 | // replace_document(term) from being hidden. This isn't really |
| 286 | // a problem as we only try to call them through the base class |
| 287 | // (where they aren't hidden) but some compilers generate a warning |
| 288 | // about the hiding. |
| 289 | #if (!defined __GNUC__ && !defined _MSC_VER) || __GNUC__ > 2 |
| 290 | using Xapian::Database::Internal::delete_document; |
| 291 | using Xapian::Database::Internal::replace_document; |
| 292 | #endif |
| 293 | void delete_document(Xapian::docid did); |
| 294 | void replace_document(Xapian::docid did, const Xapian::Document & document); |
| 295 | //@} |
| 296 | |
| 297 | public: |
| 298 | /** Create and open a writable flint database. |
| 299 | * |
| 300 | * @exception Xapian::DatabaseOpeningError thrown if database can't |
| 301 | * be opened. |
| 302 | * |
| 303 | * @exception Xapian::DatabaseVersionError thrown if database is in an |
| 304 | * unsupported format. This implies that the database was |
| 305 | * created by an older or newer version of Xapian. |
| 306 | * |
| 307 | * @param dir directory holding flint tables |
| 308 | */ |
| 309 | FlintWritableDatabase(const string &dir, int action, int block_size); |
| 310 | |
| 311 | ~FlintWritableDatabase(); |
| 312 | |
| 313 | /** Virtual methods of Database::Internal. */ |
| 314 | //@{ |
| 315 | Xapian::doclength get_doclength(Xapian::docid did) const; |
| 316 | Xapian::doccount get_termfreq(const string & tname) const; |
| 317 | Xapian::termcount get_collection_freq(const string & tname) const; |
| 318 | bool term_exists(const string & tname) const; |
| 319 | |
| 320 | LeafPostList * open_post_list(const string & tname) const; |
| 321 | TermList * open_allterms(const string & prefix) const; |
| 322 | |
| 323 | void add_spelling(const string & word, Xapian::termcount freqinc) const; |
| 324 | void remove_spelling(const string & word, Xapian::termcount freqdec) const; |
| 325 | TermList * open_spelling_wordlist() const; |
| 326 | |
| 327 | TermList * open_synonym_keylist(const string & prefix) const; |
| 328 | void add_synonym(const string & word, const string & synonym) const; |
| 329 | void remove_synonym(const string & word, const string & synonym) const; |
| 330 | void clear_synonyms(const string & word) const; |
| 331 | |
| 332 | void set_metadata(const string & key, const string & value); |
| 333 | //@} |
| 334 | }; |
| 335 | |
| 336 | #endif /* OM_HGUARD_FLINT_DATABASE_H */ |
Note: See TracBrowser
for help on using the browser.
