Ticket #326: small_doclen_chunks.patch
File small_doclen_chunks.patch, 4.8 KB (added by , 16 years ago) |
---|
-
chert_postlist.h
81 81 82 82 Xapian::docid get_chunk(const string &tname, 83 83 Xapian::docid did, bool adding, 84 PostlistChunkReader ** from, PostlistChunkWriter **to); 84 PostlistChunkReader ** from, PostlistChunkWriter **to, 85 unsigned int chunksize); 85 86 86 87 /// Compose a key from a termname and docid. 87 88 static string make_key(const string & term, Xapian::docid did) { -
chert_postlist.cc
76 76 // maximise how well blocks are used. Or performance. 77 77 // Or indexing speed. Or something... 78 78 const unsigned int CHUNKSIZE = 2000; 79 const unsigned int DOCLEN_CHUNKSIZE = 70; 79 80 80 81 /** PostlistChunkWriter is a wrapper which acts roughly as an 81 82 * output iterator on a postlist chunk, taking care of the … … 88 89 PostlistChunkWriter(const string &orig_key_, 89 90 bool is_first_chunk_, 90 91 const string &tname_, 91 bool is_last_chunk_); 92 bool is_last_chunk_, 93 unsigned int chunksize_); 92 94 93 95 /// Append an entry to this chunk. 94 96 void append(ChertTable * table, Xapian::docid did, … … 118 120 bool is_first_chunk; 119 121 bool is_last_chunk; 120 122 bool started; 123 unsigned int chunksize; 121 124 122 125 Xapian::docid first_did; 123 126 Xapian::docid current_did; … … 302 305 PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_, 303 306 bool is_first_chunk_, 304 307 const string &tname_, 305 bool is_last_chunk_) 308 bool is_last_chunk_, 309 unsigned int chunksize_) 306 310 : orig_key(orig_key_), 307 311 tname(tname_), is_first_chunk(is_first_chunk_), 308 312 is_last_chunk(is_last_chunk_), 309 started(false) 313 started(false), 314 chunksize(chunksize_) 310 315 { 311 316 DEBUGCALL(DB, void, "PostlistChunkWriter::PostlistChunkWriter", 312 317 orig_key_ << ", " << is_first_chunk_ << ", " << tname_ << ", " << 313 is_last_chunk_ );318 is_last_chunk_ << ", " << chunksize_); 314 319 } 315 320 316 321 void … … 323 328 } else { 324 329 Assert(did > current_did); 325 330 // Start a new chunk if this one has grown to the threshold. 326 if (chunk.size() >= CHUNKSIZE) {331 if (chunk.size() >= chunksize) { 327 332 bool save_is_last_chunk = is_last_chunk; 328 333 is_last_chunk = false; 329 334 flush(table); … … 961 966 Xapian::docid 962 967 ChertPostListTable::get_chunk(const string &tname, 963 968 Xapian::docid did, bool adding, 964 PostlistChunkReader ** from, PostlistChunkWriter **to) 969 PostlistChunkReader ** from, PostlistChunkWriter **to, 970 unsigned int chunksize) 965 971 { 966 DEBUGCALL(DB, Xapian::docid, "ChertPostListTable::get_chunk", tname << ", " << did << ", " << adding << ", [from], [to] ");972 DEBUGCALL(DB, Xapian::docid, "ChertPostListTable::get_chunk", tname << ", " << did << ", " << adding << ", [from], [to], " << chunksize); 967 973 // Get chunk containing entry 968 974 string key = make_key(tname, did); 969 975 … … 982 988 throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname); 983 989 984 990 *from = NULL; 985 *to = new PostlistChunkWriter(string(), true, tname, true );991 *to = new PostlistChunkWriter(string(), true, tname, true, chunksize); 986 992 RETURN(Xapian::docid(-1)); 987 993 } 988 994 … … 1007 1013 Xapian::docid last_did_in_chunk; 1008 1014 last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk); 1009 1015 *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname, 1010 is_last_chunk );1016 is_last_chunk, chunksize); 1011 1017 if (did > last_did_in_chunk) { 1012 1018 // This is the shortcut. Not very pretty, but I'll leave refactoring 1013 1019 // until I've a clearer picture of everything which needs to be done. … … 1065 1071 Xapian::docid max_did; 1066 1072 PostlistChunkReader *from; 1067 1073 PostlistChunkWriter *to; 1068 max_did = get_chunk(string(), j->first, true, &from, &to );1074 max_did = get_chunk(string(), j->first, true, &from, &to, DOCLEN_CHUNKSIZE); 1069 1075 LOGVALUE(DB, max_did); 1070 1076 for ( ; j != doclens.end(); ++j) { 1071 1077 Xapian::docid did = j->first; … … 1085 1091 delete from; 1086 1092 to->flush(this); 1087 1093 delete to; 1088 max_did = get_chunk(string(), did, false, &from, &to );1094 max_did = get_chunk(string(), did, false, &from, &to, DOCLEN_CHUNKSIZE); 1089 1095 goto next_doclen_chunk; 1090 1096 } 1091 1097 … … 1180 1186 PostlistChunkReader *from; 1181 1187 PostlistChunkWriter *to; 1182 1188 max_did = get_chunk(tname, j->first, j->second.first == 'A', 1183 &from, &to );1189 &from, &to, CHUNKSIZE); 1184 1190 for ( ; j != i->second.end(); ++j) { 1185 1191 Xapian::docid did = j->first; 1186 1192 … … 1202 1208 delete from; 1203 1209 to->flush(this); 1204 1210 delete to; 1205 max_did = get_chunk(tname, did, false, &from, &to );1211 max_did = get_chunk(tname, did, false, &from, &to, CHUNKSIZE); 1206 1212 goto next_chunk; 1207 1213 } 1208 1214