root / tags / 1.0.8 / xapian-core / net / serialise.cc

Revision 9654, 11.9 kB (checked in by olly, 14 months ago)

common/,docs/remote_protocol.html,matcher/,net/remoteserver.cc,
net/serialise.cc: If we're doing a match with only one database
which is remote then just return the unserialised MSet from the
remote match. This requires that we include
internal->percent_factor in the MSet serialisation, which requires
a minor remote protocol version bump.

  • Property svn:eol-style set to native
Line 
1/* @file serialise.cc
2 * @brief functions to convert Xapian objects to strings and back
3 */
4/* Copyright (C) 2006,2007 Olly Betts
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19 */
20
21#include <config.h>
22
23#include <xapian/document.h>
24#include <xapian/error.h>
25#include <xapian/positioniterator.h>
26#include <xapian/termiterator.h>
27#include <xapian/valueiterator.h>
28
29#include "omassert.h"
30#include "omenquireinternal.h"
31#include "serialise.h"
32#include "serialise-double.h"
33#include "stats.h"
34#include "utils.h"
35
36#include <string>
37#include <string.h>
38
39using namespace std;
40
41string
42encode_length(size_t len)
43{
44    string result;
45    if (len < 255) {
46        result += static_cast<unsigned char>(len);
47    } else {
48        result += '\xff';
49        len -= 255;
50        while (true) {
51            unsigned char byte = static_cast<unsigned char>(len & 0x7f);
52            len >>= 7;
53            if (!len) {
54                result += (byte | static_cast<unsigned char>(0x80));
55                break;
56            }
57            result += byte;
58        }
59    }
60    return result;
61}
62
63size_t
64decode_length(const char ** p, const char *end, bool check_remaining)
65{
66    if (*p == end) {
67        throw Xapian::NetworkError("Bad encoded length: no data");
68    }
69
70    size_t len = static_cast<unsigned char>(*(*p)++);
71    if (len == 0xff) {
72        len = 0;
73        unsigned char ch;
74        int shift = 0;
75        do {
76            if (*p == end || shift > 28)
77                throw Xapian::NetworkError("Bad encoded length: insufficient data");
78            ch = *(*p)++;
79            len |= size_t(ch & 0x7f) << shift;
80            shift += 7;
81        } while ((ch & 0x80) == 0);
82        len += 255;
83    }
84    if (check_remaining && len > size_t(end - *p)) {
85        throw Xapian::NetworkError("Bad encoded length: length greater than data");
86    }
87    return len;
88}
89
90string
91serialise_error(const Xapian::Error &e)
92{
93    string result;
94    result += encode_length(strlen(e.get_type()));
95    result += e.get_type();
96    result += encode_length(e.get_context().length());
97    result += e.get_context();
98    result += encode_length(e.get_msg().length());
99    result += e.get_msg();
100    // The "error string" goes last so we don't need to store its length.
101    const char * err = e.get_error_string();
102    if (err) result += err;
103    return result;
104}
105
106void
107unserialise_error(const string &serialised_error, const string &prefix,
108                  const string &new_context)
109{
110    // Use c_str() so last string is nul-terminated.
111    const char * p = serialised_error.c_str();
112    const char * end = p + serialised_error.size();
113    size_t len;
114    len = decode_length(&p, end, true);
115    if (len == 7 && memcmp(p, "UNKNOWN", 7) == 0) {
116        throw Xapian::InternalError("UNKNOWN");
117    }
118    string type(p, len);
119    p += len;
120
121    len = decode_length(&p, end, true);
122    string context(p, len);
123    p += len;
124
125    len = decode_length(&p, end, true);
126    string msg(prefix);
127    msg.append(p, len);
128    p += len;
129
130    const char * error_string = (p == end) ? NULL : p;
131
132    if (!context.empty() && !new_context.empty()) {
133        msg += "; context was: ";
134        msg += context;
135        context = new_context;
136    }
137
138#include <xapian/errordispatch.h>
139
140    msg = "Unknown remote exception type " + type + ": " + msg;
141    throw Xapian::InternalError(msg, context);
142}
143
144string serialise_stats(const Stats &stats)
145{
146    string result;
147
148    result += encode_length(stats.collection_size);
149    result += encode_length(stats.rset_size);
150    result += serialise_double(stats.average_length);
151
152    map<string, Xapian::doccount>::const_iterator i;
153
154    result += encode_length(stats.termfreq.size());
155    for (i = stats.termfreq.begin(); i != stats.termfreq.end(); ++i) {
156        result += encode_length(i->first.size());
157        result += i->first;
158        result += encode_length(i->second);
159    }
160
161    for (i = stats.reltermfreq.begin(); i != stats.reltermfreq.end(); ++i) {
162        result += encode_length(i->first.size());
163        result += i->first;
164        result += encode_length(i->second);
165    }
166
167    return result;
168}
169
170Stats
171unserialise_stats(const string &s)
172{
173    const char * p = s.c_str();
174    const char * p_end = p + s.size();
175
176    Stats stat;
177
178    stat.collection_size = decode_length(&p, p_end, false);
179    stat.rset_size = decode_length(&p, p_end, false);
180    stat.average_length = unserialise_double(&p, p_end);
181
182    size_t n = decode_length(&p, p_end, false);
183    while (n--) {
184        size_t len = decode_length(&p, p_end, true);
185        string term(p, len);
186        p += len;
187        stat.termfreq.insert(make_pair(term, decode_length(&p, p_end, false)));
188    }
189
190    while (p != p_end) {
191        size_t len = decode_length(&p, p_end, true);
192        string term(p, len);
193        p += len;
194        stat.reltermfreq.insert(make_pair(term, decode_length(&p, p_end, false)));
195    }
196
197    return stat;
198}
199
200string
201serialise_mset_pre_30_5(const Xapian::MSet &mset)
202{
203    string result;
204
205    result += encode_length(mset.get_firstitem());
206    result += encode_length(mset.get_matches_lower_bound());
207    result += encode_length(mset.get_matches_estimated());
208    result += encode_length(mset.get_matches_upper_bound());
209    result += serialise_double(mset.get_max_possible());
210    result += serialise_double(mset.get_max_attained());
211    result += encode_length(mset.size());
212    for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
213        result += serialise_double(i.get_weight());
214        result += encode_length(*i);
215        result += encode_length(i.get_collapse_key().size());
216        result += i.get_collapse_key();
217        result += encode_length(i.get_collapse_count());
218    }
219
220    const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts
221        = mset.internal->termfreqandwts;
222
223    map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j;
224    for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) {
225        result += encode_length(j->first.size());
226        result += j->first;
227        result += encode_length(j->second.termfreq);
228        result += serialise_double(j->second.termweight);
229    }
230
231    return result;
232}
233
234string
235serialise_mset(const Xapian::MSet &mset)
236{
237    string result;
238
239    result += encode_length(mset.get_firstitem());
240    result += encode_length(mset.get_matches_lower_bound());
241    result += encode_length(mset.get_matches_estimated());
242    result += encode_length(mset.get_matches_upper_bound());
243    result += serialise_double(mset.get_max_possible());
244    result += serialise_double(mset.get_max_attained());
245
246    result += serialise_double(mset.internal->percent_factor);
247
248    result += encode_length(mset.size());
249    for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
250        result += serialise_double(i.get_weight());
251        result += encode_length(*i);
252        result += encode_length(i.get_collapse_key().size());
253        result += i.get_collapse_key();
254        result += encode_length(i.get_collapse_count());
255    }
256
257    const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts
258        = mset.internal->termfreqandwts;
259
260    map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j;
261    for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) {
262        result += encode_length(j->first.size());
263        result += j->first;
264        result += encode_length(j->second.termfreq);
265        result += serialise_double(j->second.termweight);
266    }
267
268    return result;
269}
270
271Xapian::MSet
272unserialise_mset(const string &s)
273{
274    const char * p = s.data();
275    const char * p_end = p + s.size();
276
277    Xapian::doccount firstitem = decode_length(&p, p_end, false);
278    Xapian::doccount matches_lower_bound = decode_length(&p, p_end, false);
279    Xapian::doccount matches_estimated = decode_length(&p, p_end, false);
280    Xapian::doccount matches_upper_bound = decode_length(&p, p_end, false);
281    Xapian::weight max_possible = unserialise_double(&p, p_end);
282    Xapian::weight max_attained = unserialise_double(&p, p_end);
283
284    double percent_factor = unserialise_double(&p, p_end);
285
286    vector<Xapian::Internal::MSetItem> items;
287    size_t msize = decode_length(&p, p_end, false);
288    while (msize-- > 0) {
289        Xapian::weight wt = unserialise_double(&p, p_end);
290        Xapian::docid did = decode_length(&p, p_end, false);
291        size_t len = decode_length(&p, p_end, true);
292        string key(p, len);
293        p += len;
294        items.push_back(Xapian::Internal::MSetItem(wt, did, key,
295                                                   decode_length(&p, p_end, false)));
296    }
297
298    map<string, Xapian::MSet::Internal::TermFreqAndWeight> terminfo;
299    while (p != p_end) {
300        Xapian::MSet::Internal::TermFreqAndWeight tfaw;
301        size_t len = decode_length(&p, p_end, true);
302        string term(p, len);
303        p += len;
304        tfaw.termfreq = decode_length(&p, p_end, false);
305        tfaw.termweight = unserialise_double(&p, p_end);
306        terminfo.insert(make_pair(term, tfaw));
307    }
308
309    return Xapian::MSet(new Xapian::MSet::Internal(
310                                       firstitem,
311                                       matches_upper_bound,
312                                       matches_lower_bound,
313                                       matches_estimated,
314                                       max_possible, max_attained,
315                                       items, terminfo, percent_factor));
316}
317
318string
319serialise_rset(const Xapian::RSet &rset)
320{
321    const set<Xapian::docid> & items = rset.internal->get_items();
322    string result;
323    set<Xapian::docid>::const_iterator i;
324    Xapian::docid lastdid = 0;
325    for (i = items.begin(); i != items.end(); ++i) {
326        Xapian::docid did = *i;
327        result += encode_length(did - lastdid - 1);
328        lastdid = did;
329    }
330    return result;
331}
332
333Xapian::RSet
334unserialise_rset(const string &s)
335{
336    Xapian::RSet rset;
337
338    const char * p = s.data();
339    const char * p_end = p + s.size();
340
341    Xapian::docid did = 0;
342    while (p != p_end) {
343        did += decode_length(&p, p_end, false) + 1;
344        rset.add_document(did);
345    }
346
347    return rset;
348}
349
350string
351serialise_document(const Xapian::Document &doc)
352{
353    string result;
354
355    size_t n = doc.values_count();
356    result += encode_length(doc.values_count());
357    Xapian::ValueIterator value;
358    for (value = doc.values_begin(); value != doc.values_end(); ++value) {
359        result += encode_length(value.get_valueno());
360        result += encode_length((*value).size());
361        result += *value;
362        --n;
363    }
364    Assert(n == 0);
365
366    result += encode_length(doc.termlist_count());
367    Xapian::TermIterator term;
368    n = doc.termlist_count();
369    for (term = doc.termlist_begin(); term != doc.termlist_end(); ++term) {
370        result += encode_length((*term).size());
371        result += *term;
372        result += encode_length(term.get_wdf());
373
374        result += encode_length(term.positionlist_count());
375        Xapian::PositionIterator pos;
376        Xapian::termpos oldpos = 0;
377        size_t x = term.positionlist_count();
378        for (pos = term.positionlist_begin(); pos != term.positionlist_end(); ++pos) {
379            Xapian::termpos diff = *pos - oldpos;
380            string delta = encode_length(diff);
381            result += delta;
382            oldpos = *pos;
383            --x;
384        }
385        Assert(x == 0);
386        --n;
387    }
388    Assert(n == 0);
389
390    result += doc.get_data();
391    return result;
392}
393
394Xapian::Document
395unserialise_document(const string &s)
396{
397    Xapian::Document doc;
398    const char * p = s.data();
399    const char * p_end = p + s.size();
400
401    size_t n_values = decode_length(&p, p_end, false);
402    while (n_values--) {
403        Xapian::valueno valno = decode_length(&p, p_end, false);
404        size_t len = decode_length(&p, p_end, true);
405        doc.add_value(valno, string(p, len));
406        p += len;
407    }
408
409    size_t n_terms = decode_length(&p, p_end, false);
410    while (n_terms--) {
411        size_t len = decode_length(&p, p_end, true);
412        string term(p, len);
413        p += len;
414
415        // Set all the wdf using add_term, then pass wdf_inc 0 to add_posting.
416        Xapian::termcount wdf = decode_length(&p, p_end, false);
417        doc.add_term(term, wdf);
418
419        size_t n_pos = decode_length(&p, p_end, false);
420        Xapian::termpos pos = 0;
421        while (n_pos--) {
422            pos += decode_length(&p, p_end, false);
423            doc.add_posting(term, pos, 0);
424        }
425    }
426
427    doc.set_data(string(p, p_end - p));
428    return doc;
429}
Note: See TracBrowser for help on using the browser.