Ticket #326: avoid_string_operations.patch
File avoid_string_operations.patch, 20.0 KB (added by , 16 years ago) |
---|
-
chert_postlist.h
83 83 Xapian::docid did, bool adding, 84 84 PostlistChunkReader ** from, PostlistChunkWriter **to); 85 85 86 #define KEYBUF_MAX_LEN CHERT_BTREE_MAX_KEY_LEN 87 mutable char keybuf[KEYBUF_MAX_LEN]; 88 mutable size_t keybuf_curlen; 89 86 90 /// Compose a key from a termname and docid. 87 static string make_key(const string & term, Xapian::docid did) { 88 string key = make_key(term); 89 key += pack_uint_preserving_sort(did); 90 return key; 91 void make_key(const string & term, Xapian::docid did) const { 92 keybuf_curlen = 0; 93 make_key(term); 94 keybuf_curlen += append_packed_uint_preserving_sort( 95 keybuf + keybuf_curlen, KEYBUF_MAX_LEN - keybuf_curlen, did); 91 96 } 92 97 93 98 /// Compose a key from a termname. 94 static string make_key(const string & term){99 void make_key(const string & term) const { 95 100 // Special case for doclen lists. 96 if (term.empty()) return string("\x00\xe0", 2); 101 if (term.empty()) { 102 keybuf[0] = '\x00'; 103 keybuf[1] = '\xe0'; 104 keybuf_curlen = 2; 105 return; 106 } 97 107 98 return pack_string_preserving_sort(term);108 keybuf_curlen = append_string_preserving_sort(keybuf, KEYBUF_MAX_LEN, term); 99 109 } 100 110 101 111 bool term_exists(const string & term) const { 102 return key_exists(make_key(term)); 112 make_key(term); 113 return key_exists(keybuf, keybuf_curlen); 103 114 } 104 115 105 116 /** Returns number of docs indexed by @a term. … … 129 140 */ 130 141 Xapian::Internal::RefCntPtr<const ChertDatabase> this_db; 131 142 143 ChertPostListTable * pltable; 144 132 145 /// The termname for this postlist. 133 146 string tname; 134 147 -
chert_table.h
227 227 memmove(p + I2 + K1, key_.data(), key_len); 228 228 set_component_of(1); 229 229 } 230 void form_key(const char * key_, size_t keylen) { 231 if (keylen > CHERT_BTREE_MAX_KEY_LEN) { 232 // We check term length when a term is added to a document but 233 // chert doubles zero bytes, so this can still happen for terms 234 // which contain one or more zero bytes. 235 std::string msg("Key too long: length was "); 236 msg += om_tostring(keylen); 237 msg += " bytes, maximum length of a key is " 238 STRINGIZE(CHERT_BTREE_MAX_KEY_LEN) " bytes"; 239 throw Xapian::InvalidArgumentError(msg); 240 } 241 242 set_key_len(keylen + K1 + C2); 243 memmove(p + I2 + K1, key_, keylen); 244 set_component_of(1); 245 } 230 246 // FIXME passing cd here is icky 231 247 void set_tag(int cd, const char *start, int len, bool compressed) { 232 248 memmove(p + cd, start, len); … … 396 412 * false if key is not found in table. 397 413 */ 398 414 bool get_exact_entry(const std::string & key, std::string & tag) const; 415 bool get_exact_entry(const char * key, size_t keylen, string & tag) const; 399 416 400 417 /** Check if a key exists in the Btree. 401 418 * … … 409 426 * false if key is not found in table. 410 427 */ 411 428 bool key_exists(const std::string &key) const; 429 bool key_exists(const char * key, size_t keylen) const; 412 430 413 431 /** Read the tag value for the key pointed to by cursor C_. 414 432 * … … 597 615 void read_root(); 598 616 void split_root(uint4 split_n); 599 617 void form_key(const std::string & key) const; 618 void form_key(const char * key, size_t keylen) const; 600 619 601 620 char other_base_letter() const { 602 621 return (base_letter == 'A') ? 'B' : 'A'; -
chert_positionlist.cc
70 70 did << ", " << term); 71 71 72 72 string data; 73 if (!get_exact_entry(pack_uint_preserving_sort(did) + term, data)) { 73 string key; 74 // 5 bytes is usually enough for a packed uint 75 key.reserve(5 + term.size()); 76 append_packed_uint_preserving_sort(key, did); 77 key.append(term); 78 if (!get_exact_entry(key, data)) { 74 79 // There's no positional information for this term. 75 80 return 0; 76 81 } … … 106 111 positions.clear(); 107 112 108 113 string data; 109 if (!table->get_exact_entry(pack_uint_preserving_sort(did) + tname, data)) { 114 string key; 115 // 5 bytes is usually enough for a packed uint 116 key.reserve(5 + tname.size()); 117 append_packed_uint_preserving_sort(key, did); 118 key.append(tname); 119 if (!table->get_exact_entry(key, data)) { 110 120 // There's no positional information for this term. 111 121 current_pos = positions.begin(); 112 122 return false; -
chert_positionlist.h
34 34 35 35 class ChertPositionListTable : public ChertTable { 36 36 static string make_key(Xapian::docid did, const string & tname) { 37 return pack_uint_preserving_sort(did) + tname; 37 string result; 38 // 5 bytes is usually enough for a packed uint 39 result.reserve(5 + tname.size()); 40 append_packed_uint_preserving_sort(result, did); 41 result.append(tname); 42 return result + tname; 38 43 } 39 44 40 45 public: -
chert_utils.h
26 26 #include "omassert.h" 27 27 28 28 #include <xapian/types.h> 29 #include <xapian/error.h> 29 30 30 31 #include <string> 31 32 … … 224 225 * of 256 bytes on the length of the integer. However, this is unlikely to 225 226 * ever be a problem. 226 227 * 228 * @param result A string to append the representation of the integer to. 227 229 * @param value The integer to represent. 228 *229 * @result A string containing the representation of the integer.230 230 */ 231 231 template<class T> 232 string 233 pack_uint_preserving_sort(T value)232 void 233 append_packed_uint_preserving_sort(string & result, T value) 234 234 { 235 235 // Check unsigned 236 236 STATIC_ASSERT_UNSIGNED_TYPE(T); 237 STATIC_ASSERT(sizeof(T) >= 4); 237 238 238 string result; 239 string::size_type start = result.size(); 240 if (value <= 0xffff) { 241 if (value < 0xff) { 242 // 1 byte 243 char buf[2]; 244 buf[0] = char(1); 245 buf[1] = char(value & 0xff); 246 result.append(buf, 2); 247 return; 248 } else { 249 // 2 bytes 250 char buf[3]; 251 buf[0] = char(2); 252 buf[1] = char((value >> 8) & 0xff); 253 buf[2] = char(value & 0xff); 254 result.append(buf, 3); 255 return; 256 } 257 } 258 if (value <= 0xffffff) { 259 // 3 bytes 260 char buf[4]; 261 buf[0] = char(3); 262 buf[1] = char((value >> 16) & 0xff); 263 buf[2] = char((value >> 8) & 0xff); 264 buf[3] = char(value & 0xff); 265 result.append(buf, 4); 266 return; 267 } 268 269 if (value <= 0xffffffff) { 270 // 4 bytes. 271 char buf[5]; 272 buf[0] = char(4); 273 buf[1] = char((value >> 24) & 0xff); 274 buf[2] = char((value >> 16) & 0xff); 275 buf[3] = char((value >> 8) & 0xff); 276 buf[4] = char(value & 0xff); 277 result.append(buf, 5); 278 return; 279 } 280 281 if (sizeof(T) > 4) { 282 if (result.capacity() < start + 9) { 283 result.reserve(start + 9); 284 } 285 // More than 4 bytes. Do first 4, then loop. 286 result.append(string::size_type(1u), char((value >> 24) & 0xff)); 287 result.append(string::size_type(1u), char((value >> 16) & 0xff)); 288 result.append(string::size_type(1u), char((value >> 8) & 0xff)); 289 result.append(string::size_type(1u), char(value & 0xff)); 290 value = value >> 16; 291 value = value >> 16; 292 while (value != 0) { 293 om_byte part = static_cast<om_byte>(value & 0xff); 294 value = value >> 8; 295 result.insert(start, 1u, char(part)); 296 } 297 result.insert(start, 1u, char(result.size() - start)); 298 } 299 } 300 301 template<class T> 302 size_t 303 append_packed_uint_preserving_sort(char * buf, size_t buflen, T value) 304 { 305 // Check unsigned 306 STATIC_ASSERT_UNSIGNED_TYPE(T); 307 STATIC_ASSERT(sizeof(T) >= 4); 308 309 if (value <= 0xffff) { 310 if (value < 0xff) { 311 if (buflen < 2) 312 throw Xapian::InvalidArgumentError("Buffer not long enough to hold 2 byte varint."); 313 buf[0] = char(1); 314 buf[1] = char(value & 0xff); 315 return 2; 316 } else { 317 if (buflen < 3) 318 throw Xapian::InvalidArgumentError("Buffer not long enough to hold 3 byte varint."); 319 buf[0] = char(2); 320 buf[1] = char((value >> 8) & 0xff); 321 buf[2] = char(value & 0xff); 322 return 3; 323 } 324 } 325 if (value <= 0xffffff) { 326 if (buflen < 4) 327 throw Xapian::InvalidArgumentError("Buffer not long enough to hold 4 byte varint."); 328 buf[0] = char(3); 329 buf[1] = char((value >> 16) & 0xff); 330 buf[2] = char((value >> 8) & 0xff); 331 buf[3] = char(value & 0xff); 332 return 4; 333 } 334 335 if (value <= 0xffffffff) { 336 if (buflen < 5) 337 throw Xapian::InvalidArgumentError("Buffer not long enough to hold 5 byte varint."); 338 buf[0] = char(4); 339 buf[1] = char((value >> 24) & 0xff); 340 buf[2] = char((value >> 16) & 0xff); 341 buf[3] = char((value >> 8) & 0xff); 342 buf[4] = char(value & 0xff); 343 return 5; 344 } 345 346 // More than 4 bytes. Do first 4, then loop. 347 if (buflen < 6) 348 throw Xapian::InvalidArgumentError("Buffer not long enough to hold varint."); 349 buf[1] = char((value >> 24) & 0xff); 350 buf[2] = char((value >> 16) & 0xff); 351 buf[3] = char((value >> 8) & 0xff); 352 buf[4] = char(value & 0xff); 353 value = value >> 16; 354 value = value >> 16; 355 size_t bytes = 4; 239 356 while (value != 0) { 357 ++bytes; 358 if (buflen < bytes + 1) 359 throw Xapian::InvalidArgumentError("Buffer not long enough to hold varint."); 240 360 om_byte part = static_cast<om_byte>(value & 0xff); 241 361 value = value >> 8; 242 result.insert(string::size_type(0), 1u, char(part));362 buf[bytes] = char(part); 243 363 } 244 result.insert(string::size_type(0), 1u, char(result.size())); 245 return result; 364 buf[0] = char(bytes); 246 365 } 247 366 248 367 /** Unpack a unsigned integer, store in sort preserving order. … … 343 462 return value + '\0'; // Note - next byte mustn't be '\xff'... 344 463 } 345 464 465 inline size_t 466 append_string_preserving_sort(char * buf, size_t buflen, string value) 467 { 468 string::size_type i, j; 469 Assert(buflen >= 2); 470 char * pos = buf; 471 // bufend is the end of the part of buf which the string goes in. 472 const char * bufend = buf + buflen - 2; 473 j = value.size(); 474 for (i = 0; i != j; ++i) { 475 if (pos == bufend) 476 throw Xapian::InvalidArgumentError("Buffer not long enough to hold string."); 477 *pos = value[i]; 478 ++pos; 479 if (value[i] == 0) { 480 if (pos == bufend) 481 throw Xapian::InvalidArgumentError("Buffer not long enough to hold string."); 482 *pos = '\xff'; 483 ++pos; 484 } 485 } 486 pos[0] = '\0'; 487 pos[1] = '\0'; 488 return (pos - buf) + 2; 489 } 490 346 491 inline bool 347 492 unpack_string_preserving_sort(const char ** src, 348 493 const char * src_end, … … 401 546 inline string 402 547 chert_docid_to_key(Xapian::docid did) 403 548 { 404 return pack_uint_preserving_sort(did); 549 string result; 550 append_packed_uint_preserving_sort(result, did); 551 return result; 405 552 } 406 553 407 554 #endif /* OM_HGUARD_CHERT_UTILS_H */ -
chert_cursor.h
202 202 * otherwise. 203 203 */ 204 204 bool find_entry(const string &key); 205 bool find_entry(const char * key, size_t keylen) { 206 return find_entry(string(key, keylen)); 207 } 205 208 206 209 /// Position the cursor on the highest entry with key < @a key. 207 210 void find_entry_lt(const string &key) { -
chert_postlist.cc
34 34 Xapian::doccount 35 35 ChertPostListTable::get_termfreq(const string & term) const 36 36 { 37 string key =make_key(term);37 make_key(term); 38 38 string tag; 39 if (!get_exact_entry(key , tag)) return 0;39 if (!get_exact_entry(keybuf, keybuf_curlen, tag)) return 0; 40 40 41 41 Xapian::doccount termfreq; 42 42 const char * p = tag.data(); … … 47 47 Xapian::termcount 48 48 ChertPostListTable::get_collection_freq(const string & term) const 49 49 { 50 string key =make_key(term);50 make_key(term); 51 51 string tag; 52 if (!get_exact_entry(key , tag)) return 0;52 if (!get_exact_entry(keybuf, keybuf_curlen, tag)) return 0; 53 53 54 54 Xapian::termcount collfreq; 55 55 const char * p = tag.data(); … … 91 91 bool is_last_chunk_); 92 92 93 93 /// Append an entry to this chunk. 94 void append(Chert Table * table, Xapian::docid did,94 void append(ChertPostListTable * table, Xapian::docid did, 95 95 Xapian::termcount wdf); 96 96 97 97 /// Append a block of raw entries to this chunk. … … 110 110 * with a different key to the original one, if for example the first 111 111 * entry has changed. 112 112 */ 113 void flush(Chert Table *table);113 void flush(ChertPostListTable *table); 114 114 115 115 private: 116 116 string orig_key; … … 314 314 } 315 315 316 316 void 317 PostlistChunkWriter::append(ChertTable * table, Xapian::docid did, 317 PostlistChunkWriter::append(ChertPostListTable * table, 318 Xapian::docid did, 318 319 Xapian::termcount wdf) 319 320 { 320 321 if (!started) { … … 331 332 is_first_chunk = false; 332 333 first_did = did; 333 334 chunk.resize(0); 334 orig_key = ChertPostListTable::make_key(tname, first_did); 335 table->make_key(tname, first_did); 336 orig_key = string(table->keybuf, table->keybuf_curlen); 335 337 } else { 336 338 chunk.append(pack_uint(did - current_did - 1)); 337 339 } … … 379 381 } 380 382 381 383 void 382 PostlistChunkWriter::flush(Chert Table *table)384 PostlistChunkWriter::flush(ChertPostListTable *table) 383 385 { 384 386 DEBUGCALL(DB, void, "PostlistChunkWriter::flush", table); 385 387 … … 560 562 * and we just have to write this one back to disk. 561 563 */ 562 564 LOGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it"); 563 string key = ChertPostListTable::make_key(tname);564 bool ok = table->get_exact_entry( key, tag);565 table->make_key(tname); 566 bool ok = table->get_exact_entry(table->keybuf, table->keybuf_curlen, tag); 565 567 (void)ok; 566 568 Assert(ok); 567 569 Assert(!tag.empty()); … … 578 580 579 581 tag += make_start_of_chunk(is_last_chunk, first_did, current_did); 580 582 tag += chunk; 581 table->add( key, tag);583 table->add(string(table->keybuf, table->keybuf_curlen), tag); 582 584 return; 583 585 } 584 586 … … 609 611 * Create a new tag with the correct key, and replace 610 612 * the old one. 611 613 */ 612 new_key = ChertPostListTable::make_key(tname, first_did); 614 table->make_key(tname, first_did); 615 new_key = string(table->keybuf, table->keybuf_curlen); 613 616 table->del(orig_key); 614 617 } else { 615 618 new_key = orig_key; … … 661 664 const string & tname_, 662 665 bool keep_reference) 663 666 : this_db(keep_reference ? this_db_ : NULL), 667 pltable(&(this_db_->postlist_table)), 664 668 tname(tname_), 665 669 have_started(false), 666 670 cursor(this_db_->postlist_table.cursor_get()), … … 668 672 { 669 673 DEBUGCALL(DB, void, "ChertPostList::ChertPostList", 670 674 this_db_.get() << ", " << tname_ << ", " << keep_reference); 671 string key = ChertPostListTable::make_key(tname);672 int found = cursor->find_entry( key);675 pltable->make_key(tname); 676 int found = cursor->find_entry(pltable->keybuf, pltable->keybuf_curlen); 673 677 if (!found) { 674 678 LOGLINE(DB, "postlist for term not found"); 675 679 number_of_entries = 0; … … 823 827 { 824 828 DEBUGCALL(DB, void, 825 829 "ChertPostList::move_to_chunk_containing", desired_did); 826 (void)cursor->find_entry(ChertPostListTable::make_key(tname, desired_did)); 830 pltable->make_key(tname, desired_did); 831 (void)cursor->find_entry(pltable->keybuf, pltable->keybuf_curlen); 827 832 Assert(!cursor->after_end()); 828 833 829 834 const char * keypos = cursor->current_key.data(); … … 965 970 { 966 971 DEBUGCALL(DB, Xapian::docid, "ChertPostListTable::get_chunk", tname << ", " << did << ", " << adding << ", [from], [to]"); 967 972 // Get chunk containing entry 968 string key =make_key(tname, did);973 make_key(tname, did); 969 974 970 975 // Find the right chunk 971 976 AutoPtr<ChertCursor> cursor(cursor_get()); 972 977 973 (void)cursor->find_entry(key );978 (void)cursor->find_entry(keybuf, keybuf_curlen); 974 979 Assert(!cursor->after_end()); 975 980 976 981 const char * keypos = cursor->current_key.data(); … … 1050 1055 LOGVALUE(DB, doclens.size()); 1051 1056 if (!doclens.empty()) { 1052 1057 // Ensure there's a first chunk. 1053 string current_key =make_key(string());1054 if (!key_exists( current_key)) {1058 make_key(string()); 1059 if (!key_exists(keybuf, keybuf_curlen)) { 1055 1060 LOGLINE(DB, "Adding dummy first chunk"); 1056 1061 string newtag = make_start_of_first_chunk(0, 0, 0); 1057 1062 newtag += make_start_of_chunk(true, 0, 0); 1058 add( current_key, newtag);1063 add(string(keybuf, keybuf_curlen), newtag); 1059 1064 } 1060 1065 1061 1066 map<Xapian::docid, Xapian::termcount>::const_iterator j; … … 1116 1121 map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname); 1117 1122 Assert(deltas != freq_deltas.end()); 1118 1123 1119 string current_key =make_key(tname);1124 make_key(tname); 1120 1125 string tag; 1121 (void)get_exact_entry( current_key, tag);1126 (void)get_exact_entry(keybuf, keybuf_curlen, tag); 1122 1127 1123 1128 // Read start of first chunk to get termfreq and collfreq. 1124 1129 const char *pos = tag.data(); … … 1145 1150 // posting list. 1146 1151 if (islast) { 1147 1152 // Only one entry for this posting list. 1148 del( current_key);1153 del(string(keybuf, keybuf_curlen)); 1149 1154 continue; 1150 1155 } 1151 1156 AutoPtr<ChertCursor> cursor(cursor_get()); 1152 bool found = cursor->find_entry( current_key);1157 bool found = cursor->find_entry(string(keybuf, keybuf_curlen)); 1153 1158 Assert(found); 1154 1159 if (!found) continue; // Reduce damage! 1155 1160 while (cursor->del()) { … … 1165 1170 string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid); 1166 1171 newhdr += make_start_of_chunk(islast, firstdid, lastdid); 1167 1172 if (pos == end) { 1168 add( current_key, newhdr);1173 add(string(keybuf, keybuf_curlen), newhdr); 1169 1174 } else { 1170 1175 Assert((size_t)(pos - tag.data()) <= tag.size()); 1171 1176 tag.replace(0, pos - tag.data(), newhdr); 1172 add( current_key, tag);1177 add(string(keybuf, keybuf_curlen), tag); 1173 1178 } 1174 1179 } 1175 1180 map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j; -
chert_table.cc
1013 1013 LOGCALL_VOID(DB, "ChertTable::form_key", key); 1014 1014 kt.form_key(key); 1015 1015 } 1016 void ChertTable::form_key(const char * key, size_t keylen) const 1017 { 1018 LOGCALL_VOID(DB, "ChertTable::form_key", key); 1019 kt.form_key(key, keylen); 1020 } 1016 1021 1017 1022 /* ChertTable::add(key, tag) adds the key/tag item to the 1018 1023 B-tree, replacing any existing item with the same key. … … 1232 1237 } 1233 1238 1234 1239 bool 1240 ChertTable::get_exact_entry(const char * key, size_t keylen, string & tag) const 1241 { 1242 LOGCALL(DB, bool, "ChertTable::get_exact_entry", string(key, keylen) << ", [&tag]"); 1243 Assert(keylen != 0); 1244 1245 if (handle < 0) RETURN(false); 1246 1247 // An oversized key can't exist, so attempting to search for it should fail. 1248 if (keylen > CHERT_BTREE_MAX_KEY_LEN) RETURN(false); 1249 1250 form_key(key, keylen); 1251 if (!find(C)) RETURN(false); 1252 1253 (void)read_tag(C, &tag, false); 1254 RETURN(true); 1255 } 1256 1257 bool 1235 1258 ChertTable::key_exists(const string &key) const 1236 1259 { 1237 1260 LOGCALL(DB, bool, "ChertTable::key_exists", key); … … 1245 1268 } 1246 1269 1247 1270 bool 1271 ChertTable::key_exists(const char * key, size_t keylen) const 1272 { 1273 LOGCALL(DB, bool, "ChertTable::key_exists", string(key, keylen)); 1274 Assert(keylen != 0); 1275 1276 // An oversized key can't exist, so attempting to search for it should fail. 1277 if (keylen > CHERT_BTREE_MAX_KEY_LEN) RETURN(false); 1278 1279 form_key(key, keylen); 1280 RETURN(find(C)); 1281 } 1282 1283 bool 1248 1284 ChertTable::read_tag(Cursor * C_, string *tag, bool keep_compressed) const 1249 1285 { 1250 1286 LOGCALL(DB, bool, "ChertTable::read_tag", "C_, tag, " << keep_compressed); -
chert_values.h
37 37 { 38 38 std::string key("\0\xd8", 2); 39 39 key += pack_uint(slot); 40 key += pack_uint_preserving_sort(did);40 append_packed_uint_preserving_sort(key, did); 41 41 return key; 42 42 } 43 43