| | 1256 | /** Compare the positionlists for a term iterator and a termlist. |
| | 1257 | * |
| | 1258 | * @return true if they're equal, false otherwise. |
| | 1259 | */ |
| | 1260 | static bool positionlists_equal(Xapian::TermIterator & termiter, |
| | 1261 | FlintTermList & termlist) |
| | 1262 | { |
| | 1263 | int new_count = termiter.positionlist_count(); |
| | 1264 | int old_count = termlist.positionlist_count(); |
| | 1265 | if (old_count != new_count) |
| | 1266 | return false; |
| | 1267 | |
| | 1268 | PositionIterator it = termiter.positionlist_begin(); |
| | 1269 | PositionIterator it_end = termiter.positionlist_end(); |
| | 1270 | PositionIterator old = termlist.positionlist_begin(); |
| | 1271 | return equal(it, it_end, old); |
| | 1272 | } |
| | 1273 | |
| 1321 | | // Remove did from tname's postlist |
| 1322 | | map<string, map<docid, pair<char, termcount> > >::iterator j; |
| 1323 | | j = mod_plists.find(tname); |
| 1324 | | if (j == mod_plists.end()) { |
| 1325 | | map<docid, pair<char, termcount> > m; |
| 1326 | | j = mod_plists.insert(make_pair(tname, m)).first; |
| 1327 | | } |
| | 1340 | if (cmp < 0) { |
| | 1341 | // Term old_tname has been deleted. |
| | 1342 | const string& tname = old_tname; |
| | 1343 | termcount old_wdf = termlist.get_wdf(); |
| | 1344 | new_doclen -= old_wdf; |
| 1329 | | map<docid, pair<char, termcount> >::iterator k; |
| 1330 | | k = j->second.find(did); |
| 1331 | | if (k == j->second.end()) { |
| 1332 | | j->second.insert(make_pair(did, make_pair('D', 0u))); |
| 1333 | | } else { |
| 1334 | | // Modifying a document we added/modified since the last flush. |
| 1335 | | k->second = make_pair('D', 0u); |
| 1336 | | } |
| | 1346 | map<string, pair<termcount_diff, termcount_diff> >::iterator i; |
| | 1347 | i = freq_deltas.find(tname); |
| | 1348 | if (i == freq_deltas.end()) { |
| | 1349 | freq_deltas.insert(make_pair(tname, make_pair(-1, -termcount_diff(old_wdf)))); |
| | 1350 | } else { |
| | 1351 | --i->second.first; |
| | 1352 | i->second.second -= old_wdf; |
| | 1353 | } |
| 1343 | | termlist.next(); |
| 1344 | | } |
| | 1357 | // Remove did from tname's postlist |
| | 1358 | map<string, map<docid, pair<char, termcount> > >::iterator j; |
| | 1359 | j = mod_plists.find(tname); |
| | 1360 | if (j == mod_plists.end()) { |
| | 1361 | map<docid, pair<char, termcount> > m; |
| | 1362 | j = mod_plists.insert(make_pair(tname, m)).first; |
| | 1363 | } |
| 1346 | | total_length -= termlist.get_doclength(); |
| | 1365 | map<docid, pair<char, termcount> >::iterator k; |
| | 1366 | k = j->second.find(did); |
| | 1367 | if (k == j->second.end()) { |
| | 1368 | j->second.insert(make_pair(did, make_pair('D', 0u))); |
| | 1369 | } else { |
| | 1370 | // Modifying a document we added/modified since the last flush. |
| | 1371 | k->second = make_pair('D', 0u); |
| | 1372 | } |
| | 1373 | } else if (cmp != 0) { |
| | 1374 | // Term new_tname as been added. |
| | 1375 | const string& tname = new_tname; |
| | 1376 | termcount new_wdf = term.get_wdf(); |
| | 1377 | new_doclen += new_wdf; |
| 1348 | | flint_doclen_t new_doclen = 0; |
| 1349 | | for (term = document.termlist_begin(); |
| 1350 | | term != document.termlist_end(); ++term) { |
| 1351 | | // Calculate the new document length |
| 1352 | | termcount wdf = term.get_wdf(); |
| 1353 | | new_doclen += wdf; |
| | 1379 | if (tname.size() > MAX_SAFE_TERM_LENGTH) |
| | 1380 | throw Xapian::InvalidArgumentError("Term too long (> "STRINGIZE(MAX_SAFE_TERM_LENGTH)"): " + tname); |
| | 1381 | map<string, pair<termcount_diff, termcount_diff> >::iterator i; |
| | 1382 | i = freq_deltas.find(tname); |
| | 1383 | if (i == freq_deltas.end()) { |
| | 1384 | freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(new_wdf)))); |
| | 1385 | } else { |
| | 1386 | ++i->second.first; |
| | 1387 | i->second.second += new_wdf; |
| | 1388 | } |
| 1355 | | string tname = *term; |
| 1356 | | if (tname.size() > MAX_SAFE_TERM_LENGTH) |
| 1357 | | throw Xapian::InvalidArgumentError("Term too long (> "STRINGIZE(MAX_SAFE_TERM_LENGTH)"): " + tname); |
| 1358 | | map<string, pair<termcount_diff, termcount_diff> >::iterator i; |
| 1359 | | i = freq_deltas.find(tname); |
| 1360 | | if (i == freq_deltas.end()) { |
| 1361 | | freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf)))); |
| 1362 | | } else { |
| 1363 | | ++i->second.first; |
| 1364 | | i->second.second += wdf; |
| 1365 | | } |
| | 1390 | // Add did to tname's postlist |
| | 1391 | map<string, map<docid, pair<char, termcount> > >::iterator j; |
| | 1392 | j = mod_plists.find(tname); |
| | 1393 | if (j == mod_plists.end()) { |
| | 1394 | map<docid, pair<char, termcount> > m; |
| | 1395 | j = mod_plists.insert(make_pair(tname, m)).first; |
| | 1396 | } |
| | 1397 | map<docid, pair<char, termcount> >::iterator k; |
| | 1398 | k = j->second.find(did); |
| | 1399 | if (k != j->second.end()) { |
| | 1400 | Assert(k->second.first == 'D'); |
| | 1401 | k->second.first = 'M'; |
| | 1402 | k->second.second = new_wdf; |
| | 1403 | } else { |
| | 1404 | j->second.insert(make_pair(did, make_pair('A', new_wdf))); |
| | 1405 | } |
| 1367 | | // Add did to tname's postlist |
| 1368 | | map<string, map<docid, pair<char, termcount> > >::iterator j; |
| 1369 | | j = mod_plists.find(tname); |
| 1370 | | if (j == mod_plists.end()) { |
| 1371 | | map<docid, pair<char, termcount> > m; |
| 1372 | | j = mod_plists.insert(make_pair(tname, m)).first; |
| | 1407 | PositionIterator it = term.positionlist_begin(); |
| | 1408 | PositionIterator it_end = term.positionlist_end(); |
| | 1409 | if (it != it_end) { |
| | 1410 | position_table.set_positionlist(did, tname, it, it_end); |
| | 1411 | } else { |
| | 1412 | position_table.delete_positionlist(did, tname); |
| | 1413 | } |
| | 1414 | } else if (cmp == 0) { |
| | 1415 | // Term already exists: look for wdf and positionlist changes. |
| | 1416 | termcount old_wdf = termlist.get_wdf(); |
| | 1417 | termcount new_wdf = term.get_wdf(); |
| | 1418 | if (old_wdf != new_wdf) { |
| | 1419 | new_doclen += new_wdf - old_wdf; |
| | 1420 | |
| | 1421 | map<string, pair<termcount_diff, termcount_diff> >::iterator i; |
| | 1422 | i = freq_deltas.find(new_tname); |
| | 1423 | if (i == freq_deltas.end()) { |
| | 1424 | freq_deltas.insert(make_pair(new_tname, make_pair(0, termcount_diff(new_wdf - old_wdf)))); |
| | 1425 | } else { |
| | 1426 | i->second.second += new_wdf - old_wdf; |
| | 1427 | } |
| | 1428 | } |
| | 1429 | |
| | 1430 | if (!positionlists_equal(term, termlist)) { |
| | 1431 | PositionIterator it = term.positionlist_begin(); |
| | 1432 | PositionIterator it_end = term.positionlist_end(); |
| | 1433 | if (it != it_end) { |
| | 1434 | position_table.set_positionlist(did, new_tname, it, it_end); |
| | 1435 | } else { |
| | 1436 | position_table.delete_positionlist(did, new_tname); |
| | 1437 | } |
| | 1438 | } |