| 1 | |
|---|
| 2 | |
|---|
| 3 | |
|---|
| 4 | |
|---|
| 5 | |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | |
|---|
| 9 | |
|---|
| 10 | |
|---|
| 11 | |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | |
|---|
| 17 | |
|---|
| 18 | |
|---|
| 19 | |
|---|
| 20 | |
|---|
| 21 | #include <config.h> |
|---|
| 22 | |
|---|
| 23 | #include <xapian/document.h> |
|---|
| 24 | #include <xapian/error.h> |
|---|
| 25 | #include <xapian/positioniterator.h> |
|---|
| 26 | #include <xapian/termiterator.h> |
|---|
| 27 | #include <xapian/valueiterator.h> |
|---|
| 28 | |
|---|
| 29 | #include "omassert.h" |
|---|
| 30 | #include "omenquireinternal.h" |
|---|
| 31 | #include "serialise.h" |
|---|
| 32 | #include "serialise-double.h" |
|---|
| 33 | #include "stats.h" |
|---|
| 34 | #include "utils.h" |
|---|
| 35 | |
|---|
| 36 | #include <string> |
|---|
| 37 | #include <string.h> |
|---|
| 38 | |
|---|
| 39 | using namespace std; |
|---|
| 40 | |
|---|
| 41 | string |
|---|
| 42 | encode_length(size_t len) |
|---|
| 43 | { |
|---|
| 44 | string result; |
|---|
| 45 | if (len < 255) { |
|---|
| 46 | result += static_cast<unsigned char>(len); |
|---|
| 47 | } else { |
|---|
| 48 | result += '\xff'; |
|---|
| 49 | len -= 255; |
|---|
| 50 | while (true) { |
|---|
| 51 | unsigned char byte = static_cast<unsigned char>(len & 0x7f); |
|---|
| 52 | len >>= 7; |
|---|
| 53 | if (!len) { |
|---|
| 54 | result += (byte | static_cast<unsigned char>(0x80)); |
|---|
| 55 | break; |
|---|
| 56 | } |
|---|
| 57 | result += byte; |
|---|
| 58 | } |
|---|
| 59 | } |
|---|
| 60 | return result; |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | size_t |
|---|
| 64 | decode_length(const char ** p, const char *end, bool check_remaining) |
|---|
| 65 | { |
|---|
| 66 | if (*p == end) { |
|---|
| 67 | throw Xapian::NetworkError("Bad encoded length: no data"); |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | size_t len = static_cast<unsigned char>(*(*p)++); |
|---|
| 71 | if (len == 0xff) { |
|---|
| 72 | len = 0; |
|---|
| 73 | unsigned char ch; |
|---|
| 74 | int shift = 0; |
|---|
| 75 | do { |
|---|
| 76 | if (*p == end || shift > 28) |
|---|
| 77 | throw Xapian::NetworkError("Bad encoded length: insufficient data"); |
|---|
| 78 | ch = *(*p)++; |
|---|
| 79 | len |= size_t(ch & 0x7f) << shift; |
|---|
| 80 | shift += 7; |
|---|
| 81 | } while ((ch & 0x80) == 0); |
|---|
| 82 | len += 255; |
|---|
| 83 | } |
|---|
| 84 | if (check_remaining && len > size_t(end - *p)) { |
|---|
| 85 | throw Xapian::NetworkError("Bad encoded length: length greater than data"); |
|---|
| 86 | } |
|---|
| 87 | return len; |
|---|
| 88 | } |
|---|
| 89 | |
|---|
| 90 | string |
|---|
| 91 | serialise_error(const Xapian::Error &e) |
|---|
| 92 | { |
|---|
| 93 | string result; |
|---|
| 94 | result += encode_length(strlen(e.get_type())); |
|---|
| 95 | result += e.get_type(); |
|---|
| 96 | result += encode_length(e.get_context().length()); |
|---|
| 97 | result += e.get_context(); |
|---|
| 98 | result += encode_length(e.get_msg().length()); |
|---|
| 99 | result += e.get_msg(); |
|---|
| 100 | |
|---|
| 101 | const char * err = e.get_error_string(); |
|---|
| 102 | if (err) result += err; |
|---|
| 103 | return result; |
|---|
| 104 | } |
|---|
| 105 | |
|---|
| 106 | void |
|---|
| 107 | unserialise_error(const string &serialised_error, const string &prefix, |
|---|
| 108 | const string &new_context) |
|---|
| 109 | { |
|---|
| 110 | |
|---|
| 111 | const char * p = serialised_error.c_str(); |
|---|
| 112 | const char * end = p + serialised_error.size(); |
|---|
| 113 | size_t len; |
|---|
| 114 | len = decode_length(&p, end, true); |
|---|
| 115 | if (len == 7 && memcmp(p, "UNKNOWN", 7) == 0) { |
|---|
| 116 | throw Xapian::InternalError("UNKNOWN"); |
|---|
| 117 | } |
|---|
| 118 | string type(p, len); |
|---|
| 119 | p += len; |
|---|
| 120 | |
|---|
| 121 | len = decode_length(&p, end, true); |
|---|
| 122 | string context(p, len); |
|---|
| 123 | p += len; |
|---|
| 124 | |
|---|
| 125 | len = decode_length(&p, end, true); |
|---|
| 126 | string msg(prefix); |
|---|
| 127 | msg.append(p, len); |
|---|
| 128 | p += len; |
|---|
| 129 | |
|---|
| 130 | const char * error_string = (p == end) ? NULL : p; |
|---|
| 131 | |
|---|
| 132 | if (!context.empty() && !new_context.empty()) { |
|---|
| 133 | msg += "; context was: "; |
|---|
| 134 | msg += context; |
|---|
| 135 | context = new_context; |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | #include <xapian/errordispatch.h> |
|---|
| 139 | |
|---|
| 140 | msg = "Unknown remote exception type " + type + ": " + msg; |
|---|
| 141 | throw Xapian::InternalError(msg, context); |
|---|
| 142 | } |
|---|
| 143 | |
|---|
| 144 | string serialise_stats(const Stats &stats) |
|---|
| 145 | { |
|---|
| 146 | string result; |
|---|
| 147 | |
|---|
| 148 | result += encode_length(stats.collection_size); |
|---|
| 149 | result += encode_length(stats.rset_size); |
|---|
| 150 | result += serialise_double(stats.average_length); |
|---|
| 151 | |
|---|
| 152 | map<string, Xapian::doccount>::const_iterator i; |
|---|
| 153 | |
|---|
| 154 | result += encode_length(stats.termfreq.size()); |
|---|
| 155 | for (i = stats.termfreq.begin(); i != stats.termfreq.end(); ++i) { |
|---|
| 156 | result += encode_length(i->first.size()); |
|---|
| 157 | result += i->first; |
|---|
| 158 | result += encode_length(i->second); |
|---|
| 159 | } |
|---|
| 160 | |
|---|
| 161 | for (i = stats.reltermfreq.begin(); i != stats.reltermfreq.end(); ++i) { |
|---|
| 162 | result += encode_length(i->first.size()); |
|---|
| 163 | result += i->first; |
|---|
| 164 | result += encode_length(i->second); |
|---|
| 165 | } |
|---|
| 166 | |
|---|
| 167 | return result; |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | Stats |
|---|
| 171 | unserialise_stats(const string &s) |
|---|
| 172 | { |
|---|
| 173 | const char * p = s.c_str(); |
|---|
| 174 | const char * p_end = p + s.size(); |
|---|
| 175 | |
|---|
| 176 | Stats stat; |
|---|
| 177 | |
|---|
| 178 | stat.collection_size = decode_length(&p, p_end, false); |
|---|
| 179 | stat.rset_size = decode_length(&p, p_end, false); |
|---|
| 180 | stat.average_length = unserialise_double(&p, p_end); |
|---|
| 181 | |
|---|
| 182 | size_t n = decode_length(&p, p_end, false); |
|---|
| 183 | while (n--) { |
|---|
| 184 | size_t len = decode_length(&p, p_end, true); |
|---|
| 185 | string term(p, len); |
|---|
| 186 | p += len; |
|---|
| 187 | stat.termfreq.insert(make_pair(term, decode_length(&p, p_end, false))); |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | while (p != p_end) { |
|---|
| 191 | size_t len = decode_length(&p, p_end, true); |
|---|
| 192 | string term(p, len); |
|---|
| 193 | p += len; |
|---|
| 194 | stat.reltermfreq.insert(make_pair(term, decode_length(&p, p_end, false))); |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | return stat; |
|---|
| 198 | } |
|---|
| 199 | |
|---|
| 200 | string |
|---|
| 201 | serialise_mset_pre_30_5(const Xapian::MSet &mset) |
|---|
| 202 | { |
|---|
| 203 | string result; |
|---|
| 204 | |
|---|
| 205 | result += encode_length(mset.get_firstitem()); |
|---|
| 206 | result += encode_length(mset.get_matches_lower_bound()); |
|---|
| 207 | result += encode_length(mset.get_matches_estimated()); |
|---|
| 208 | result += encode_length(mset.get_matches_upper_bound()); |
|---|
| 209 | result += serialise_double(mset.get_max_possible()); |
|---|
| 210 | result += serialise_double(mset.get_max_attained()); |
|---|
| 211 | result += encode_length(mset.size()); |
|---|
| 212 | for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) { |
|---|
| 213 | result += serialise_double(i.get_weight()); |
|---|
| 214 | result += encode_length(*i); |
|---|
| 215 | result += encode_length(i.get_collapse_key().size()); |
|---|
| 216 | result += i.get_collapse_key(); |
|---|
| 217 | result += encode_length(i.get_collapse_count()); |
|---|
| 218 | } |
|---|
| 219 | |
|---|
| 220 | const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts |
|---|
| 221 | = mset.internal->termfreqandwts; |
|---|
| 222 | |
|---|
| 223 | map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j; |
|---|
| 224 | for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) { |
|---|
| 225 | result += encode_length(j->first.size()); |
|---|
| 226 | result += j->first; |
|---|
| 227 | result += encode_length(j->second.termfreq); |
|---|
| 228 | result += serialise_double(j->second.termweight); |
|---|
| 229 | } |
|---|
| 230 | |
|---|
| 231 | return result; |
|---|
| 232 | } |
|---|
| 233 | |
|---|
| 234 | string |
|---|
| 235 | serialise_mset(const Xapian::MSet &mset) |
|---|
| 236 | { |
|---|
| 237 | string result; |
|---|
| 238 | |
|---|
| 239 | result += encode_length(mset.get_firstitem()); |
|---|
| 240 | result += encode_length(mset.get_matches_lower_bound()); |
|---|
| 241 | result += encode_length(mset.get_matches_estimated()); |
|---|
| 242 | result += encode_length(mset.get_matches_upper_bound()); |
|---|
| 243 | result += serialise_double(mset.get_max_possible()); |
|---|
| 244 | result += serialise_double(mset.get_max_attained()); |
|---|
| 245 | |
|---|
| 246 | result += serialise_double(mset.internal->percent_factor); |
|---|
| 247 | |
|---|
| 248 | result += encode_length(mset.size()); |
|---|
| 249 | for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) { |
|---|
| 250 | result += serialise_double(i.get_weight()); |
|---|
| 251 | result += encode_length(*i); |
|---|
| 252 | result += encode_length(i.get_collapse_key().size()); |
|---|
| 253 | result += i.get_collapse_key(); |
|---|
| 254 | result += encode_length(i.get_collapse_count()); |
|---|
| 255 | } |
|---|
| 256 | |
|---|
| 257 | const map<string, Xapian::MSet::Internal::TermFreqAndWeight> &termfreqandwts |
|---|
| 258 | = mset.internal->termfreqandwts; |
|---|
| 259 | |
|---|
| 260 | map<string, Xapian::MSet::Internal::TermFreqAndWeight>::const_iterator j; |
|---|
| 261 | for (j = termfreqandwts.begin(); j != termfreqandwts.end(); ++j) { |
|---|
| 262 | result += encode_length(j->first.size()); |
|---|
| 263 | result += j->first; |
|---|
| 264 | result += encode_length(j->second.termfreq); |
|---|
| 265 | result += serialise_double(j->second.termweight); |
|---|
| 266 | } |
|---|
| 267 | |
|---|
| 268 | return result; |
|---|
| 269 | } |
|---|
| 270 | |
|---|
| 271 | Xapian::MSet |
|---|
| 272 | unserialise_mset(const string &s) |
|---|
| 273 | { |
|---|
| 274 | const char * p = s.data(); |
|---|
| 275 | const char * p_end = p + s.size(); |
|---|
| 276 | |
|---|
| 277 | Xapian::doccount firstitem = decode_length(&p, p_end, false); |
|---|
| 278 | Xapian::doccount matches_lower_bound = decode_length(&p, p_end, false); |
|---|
| 279 | Xapian::doccount matches_estimated = decode_length(&p, p_end, false); |
|---|
| 280 | Xapian::doccount matches_upper_bound = decode_length(&p, p_end, false); |
|---|
| 281 | Xapian::weight max_possible = unserialise_double(&p, p_end); |
|---|
| 282 | Xapian::weight max_attained = unserialise_double(&p, p_end); |
|---|
| 283 | |
|---|
| 284 | double percent_factor = unserialise_double(&p, p_end); |
|---|
| 285 | |
|---|
| 286 | vector<Xapian::Internal::MSetItem> items; |
|---|
| 287 | size_t msize = decode_length(&p, p_end, false); |
|---|
| 288 | while (msize-- > 0) { |
|---|
| 289 | Xapian::weight wt = unserialise_double(&p, p_end); |
|---|
| 290 | Xapian::docid did = decode_length(&p, p_end, false); |
|---|
| 291 | size_t len = decode_length(&p, p_end, true); |
|---|
| 292 | string key(p, len); |
|---|
| 293 | p += len; |
|---|
| 294 | items.push_back(Xapian::Internal::MSetItem(wt, did, key, |
|---|
| 295 | decode_length(&p, p_end, false))); |
|---|
| 296 | } |
|---|
| 297 | |
|---|
| 298 | map<string, Xapian::MSet::Internal::TermFreqAndWeight> terminfo; |
|---|
| 299 | while (p != p_end) { |
|---|
| 300 | Xapian::MSet::Internal::TermFreqAndWeight tfaw; |
|---|
| 301 | size_t len = decode_length(&p, p_end, true); |
|---|
| 302 | string term(p, len); |
|---|
| 303 | p += len; |
|---|
| 304 | tfaw.termfreq = decode_length(&p, p_end, false); |
|---|
| 305 | tfaw.termweight = unserialise_double(&p, p_end); |
|---|
| 306 | terminfo.insert(make_pair(term, tfaw)); |
|---|
| 307 | } |
|---|
| 308 | |
|---|
| 309 | return Xapian::MSet(new Xapian::MSet::Internal( |
|---|
| 310 | firstitem, |
|---|
| 311 | matches_upper_bound, |
|---|
| 312 | matches_lower_bound, |
|---|
| 313 | matches_estimated, |
|---|
| 314 | max_possible, max_attained, |
|---|
| 315 | items, terminfo, percent_factor)); |
|---|
| 316 | } |
|---|
| 317 | |
|---|
| 318 | string |
|---|
| 319 | serialise_rset(const Xapian::RSet &rset) |
|---|
| 320 | { |
|---|
| 321 | const set<Xapian::docid> & items = rset.internal->get_items(); |
|---|
| 322 | string result; |
|---|
| 323 | set<Xapian::docid>::const_iterator i; |
|---|
| 324 | Xapian::docid lastdid = 0; |
|---|
| 325 | for (i = items.begin(); i != items.end(); ++i) { |
|---|
| 326 | Xapian::docid did = *i; |
|---|
| 327 | result += encode_length(did - lastdid - 1); |
|---|
| 328 | lastdid = did; |
|---|
| 329 | } |
|---|
| 330 | return result; |
|---|
| 331 | } |
|---|
| 332 | |
|---|
| 333 | Xapian::RSet |
|---|
| 334 | unserialise_rset(const string &s) |
|---|
| 335 | { |
|---|
| 336 | Xapian::RSet rset; |
|---|
| 337 | |
|---|
| 338 | const char * p = s.data(); |
|---|
| 339 | const char * p_end = p + s.size(); |
|---|
| 340 | |
|---|
| 341 | Xapian::docid did = 0; |
|---|
| 342 | while (p != p_end) { |
|---|
| 343 | did += decode_length(&p, p_end, false) + 1; |
|---|
| 344 | rset.add_document(did); |
|---|
| 345 | } |
|---|
| 346 | |
|---|
| 347 | return rset; |
|---|
| 348 | } |
|---|
| 349 | |
|---|
| 350 | string |
|---|
| 351 | serialise_document(const Xapian::Document &doc) |
|---|
| 352 | { |
|---|
| 353 | string result; |
|---|
| 354 | |
|---|
| 355 | size_t n = doc.values_count(); |
|---|
| 356 | result += encode_length(doc.values_count()); |
|---|
| 357 | Xapian::ValueIterator value; |
|---|
| 358 | for (value = doc.values_begin(); value != doc.values_end(); ++value) { |
|---|
| 359 | result += encode_length(value.get_valueno()); |
|---|
| 360 | result += encode_length((*value).size()); |
|---|
| 361 | result += *value; |
|---|
| 362 | --n; |
|---|
| 363 | } |
|---|
| 364 | Assert(n == 0); |
|---|
| 365 | |
|---|
| 366 | result += encode_length(doc.termlist_count()); |
|---|
| 367 | Xapian::TermIterator term; |
|---|
| 368 | n = doc.termlist_count(); |
|---|
| 369 | for (term = doc.termlist_begin(); term != doc.termlist_end(); ++term) { |
|---|
| 370 | result += encode_length((*term).size()); |
|---|
| 371 | result += *term; |
|---|
| 372 | result += encode_length(term.get_wdf()); |
|---|
| 373 | |
|---|
| 374 | result += encode_length(term.positionlist_count()); |
|---|
| 375 | Xapian::PositionIterator pos; |
|---|
| 376 | Xapian::termpos oldpos = 0; |
|---|
| 377 | size_t x = term.positionlist_count(); |
|---|
| 378 | for (pos = term.positionlist_begin(); pos != term.positionlist_end(); ++pos) { |
|---|
| 379 | Xapian::termpos diff = *pos - oldpos; |
|---|
| 380 | string delta = encode_length(diff); |
|---|
| 381 | result += delta; |
|---|
| 382 | oldpos = *pos; |
|---|
| 383 | --x; |
|---|
| 384 | } |
|---|
| 385 | Assert(x == 0); |
|---|
| 386 | --n; |
|---|
| 387 | } |
|---|
| 388 | Assert(n == 0); |
|---|
| 389 | |
|---|
| 390 | result += doc.get_data(); |
|---|
| 391 | return result; |
|---|
| 392 | } |
|---|
| 393 | |
|---|
| 394 | Xapian::Document |
|---|
| 395 | unserialise_document(const string &s) |
|---|
| 396 | { |
|---|
| 397 | Xapian::Document doc; |
|---|
| 398 | const char * p = s.data(); |
|---|
| 399 | const char * p_end = p + s.size(); |
|---|
| 400 | |
|---|
| 401 | size_t n_values = decode_length(&p, p_end, false); |
|---|
| 402 | while (n_values--) { |
|---|
| 403 | Xapian::valueno valno = decode_length(&p, p_end, false); |
|---|
| 404 | size_t len = decode_length(&p, p_end, true); |
|---|
| 405 | doc.add_value(valno, string(p, len)); |
|---|
| 406 | p += len; |
|---|
| 407 | } |
|---|
| 408 | |
|---|
| 409 | size_t n_terms = decode_length(&p, p_end, false); |
|---|
| 410 | while (n_terms--) { |
|---|
| 411 | size_t len = decode_length(&p, p_end, true); |
|---|
| 412 | string term(p, len); |
|---|
| 413 | p += len; |
|---|
| 414 | |
|---|
| 415 | |
|---|
| 416 | Xapian::termcount wdf = decode_length(&p, p_end, false); |
|---|
| 417 | doc.add_term(term, wdf); |
|---|
| 418 | |
|---|
| 419 | size_t n_pos = decode_length(&p, p_end, false); |
|---|
| 420 | Xapian::termpos pos = 0; |
|---|
| 421 | while (n_pos--) { |
|---|
| 422 | pos += decode_length(&p, p_end, false); |
|---|
| 423 | doc.add_posting(term, pos, 0); |
|---|
| 424 | } |
|---|
| 425 | } |
|---|
| 426 | |
|---|
| 427 | doc.set_data(string(p, p_end - p)); |
|---|
| 428 | return doc; |
|---|
| 429 | } |
|---|