Index: backends/flint/flint_database.cc
===================================================================
--- backends/flint/flint_database.cc	(revision 13796)
+++ backends/flint/flint_database.cc	(working copy)
@@ -6,6 +6,7 @@
  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
  * Copyright 2006,2008 Lemur Consulting Ltd
  * Copyright 2009 Richard Boulton
+ * Copyright 2009 Kan-Ru Chen
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -1252,6 +1253,24 @@
     }
 }
 
+/** Compare the positionlists for a term iterator and a termlist.
+ *
+ *  @return true if they're equal, false otherwise.
+ */
+static bool positionlists_equal(Xapian::TermIterator & termiter,
+				FlintTermList & termlist)
+{
+    int new_count = termiter.positionlist_count();
+    int old_count = termlist.positionlist_count();
+    if (old_count != new_count)
+	return false;
+
+    PositionIterator it = termiter.positionlist_begin();
+    PositionIterator it_end = termiter.positionlist_end();
+    PositionIterator old = termlist.positionlist_begin();
+    return equal(it, it_end, old);
+}
+
 void
 FlintWritableDatabase::replace_document(Xapian::docid did,
 					const Xapian::Document & document)
@@ -1293,101 +1312,143 @@
 	}
   
 	if (!modifying || document.internal->terms_modified()) {
-	    // FIXME - in the case where there is overlap between the new
-	    // termlist and the old termlist, it would be better to compare the
-	    // two lists, and make the minimum set of modifications required.
-	    // This would lead to smaller changesets for replication, and
-	    // probably be faster overall.
-
-	    // First, add entries to remove the postings in the underlying record.
 	    Xapian::Internal::RefCntPtr<const FlintWritableDatabase> ptrtothis(this);
 	    FlintTermList termlist(ptrtothis, did);
 	    Xapian::TermIterator term = document.termlist_begin();
+	    Xapian::TermIterator term_end = document.termlist_end();
+	    flint_doclen_t new_doclen = termlist.get_doclength();
+	    string old_tname, new_tname;
+ 
+	    total_length -= new_doclen;
 
 	    termlist.next();
-	    while (!termlist.at_end()) {
-		string tname = termlist.get_termname();
-		termcount wdf = termlist.get_wdf();
+	    while (!termlist.at_end() || term != term_end) {
+		int cmp;
+		if (!termlist.at_end() && term != term_end) {
+		    old_tname = termlist.get_termname();
+		    new_tname = *term;
+		    cmp = old_tname.compare(new_tname);
 
-		map<string, pair<termcount_diff, termcount_diff> >::iterator i;
-		i = freq_deltas.find(tname);
-		if (i == freq_deltas.end()) {
-		    freq_deltas.insert(make_pair(tname, make_pair(-1, -termcount_diff(wdf))));
+		} else if (termlist.at_end()) {
+		    cmp = 1;
+		    new_tname = *term;
 		} else {
-		    --i->second.first;
-		    i->second.second -= wdf;
+		    cmp = -1;
+		    old_tname = termlist.get_termname();
 		}
 
-		// Remove did from tname's postlist
-		map<string, map<docid, pair<char, termcount> > >::iterator j;
-		j = mod_plists.find(tname);
-		if (j == mod_plists.end()) {
-		    map<docid, pair<char, termcount> > m;
-		    j = mod_plists.insert(make_pair(tname, m)).first;
-		}
+		if (cmp < 0) {
+		    // Term old_tname has been deleted.
+		    const string& tname = old_tname;
+		    termcount old_wdf = termlist.get_wdf();
+		    new_doclen -= old_wdf;
 
-		map<docid, pair<char, termcount> >::iterator k;
-		k = j->second.find(did);
-		if (k == j->second.end()) {
-		    j->second.insert(make_pair(did, make_pair('D', 0u)));
-		} else {
-		    // Modifying a document we added/modified since the last flush.
-		    k->second = make_pair('D', 0u);
-		}
+		    map<string, pair<termcount_diff, termcount_diff> >::iterator i;
+		    i = freq_deltas.find(tname);
+		    if (i == freq_deltas.end()) {
+			freq_deltas.insert(make_pair(tname, make_pair(-1, -termcount_diff(old_wdf))));
+		    } else {
+			--i->second.first;
+			i->second.second -= old_wdf;
+		    }
 
-		term.skip_to(tname);
-		if (term == document.termlist_end() || *term != tname) {
 		    position_table.delete_positionlist(did, tname);
-		}
 
-		termlist.next();
-	    }
+		    // Remove did from tname's postlist
+		    map<string, map<docid, pair<char, termcount> > >::iterator j;
+		    j = mod_plists.find(tname);
+		    if (j == mod_plists.end()) {
+			map<docid, pair<char, termcount> > m;
+			j = mod_plists.insert(make_pair(tname, m)).first;
+		    }
 
-	    total_length -= termlist.get_doclength();
+		    map<docid, pair<char, termcount> >::iterator k;
+		    k = j->second.find(did);
+		    if (k == j->second.end()) {
+			j->second.insert(make_pair(did, make_pair('D', 0u)));
+		    } else {
+			// Modifying a document we added/modified since the last flush.
+			k->second = make_pair('D', 0u);
+		    }
+		} else if (cmp != 0) {
+		    // Term new_tname as been added.
+		    const string& tname = new_tname;
+		    termcount new_wdf = term.get_wdf();
+		    new_doclen += new_wdf;
 
-	    flint_doclen_t new_doclen = 0;
-	    for (term = document.termlist_begin();
-		 term != document.termlist_end(); ++term) {
-		// Calculate the new document length
-		termcount wdf = term.get_wdf();
-		new_doclen += wdf;
+		    if (tname.size() > MAX_SAFE_TERM_LENGTH)
+			throw Xapian::InvalidArgumentError("Term too long (> "STRINGIZE(MAX_SAFE_TERM_LENGTH)"): " + tname);
+		    map<string, pair<termcount_diff, termcount_diff> >::iterator i;
+		    i = freq_deltas.find(tname);
+		    if (i == freq_deltas.end()) {
+			freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(new_wdf))));
+		    } else {
+			++i->second.first;
+			i->second.second += new_wdf;
+		    }
 
-		string tname = *term;
-		if (tname.size() > MAX_SAFE_TERM_LENGTH)
-		    throw Xapian::InvalidArgumentError("Term too long (> "STRINGIZE(MAX_SAFE_TERM_LENGTH)"): " + tname);
-		map<string, pair<termcount_diff, termcount_diff> >::iterator i;
-		i = freq_deltas.find(tname);
-		if (i == freq_deltas.end()) {
-		    freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf))));
-		} else {
-		    ++i->second.first;
-		    i->second.second += wdf;
-		}
+		    // Add did to tname's postlist
+		    map<string, map<docid, pair<char, termcount> > >::iterator j;
+		    j = mod_plists.find(tname);
+		    if (j == mod_plists.end()) {
+			map<docid, pair<char, termcount> > m;
+			j = mod_plists.insert(make_pair(tname, m)).first;
+		    }
+		    map<docid, pair<char, termcount> >::iterator k;
+		    k = j->second.find(did);
+		    if (k != j->second.end()) {
+			Assert(k->second.first == 'D');
+			k->second.first = 'M';
+			k->second.second = new_wdf;
+		    } else {
+			j->second.insert(make_pair(did, make_pair('A', new_wdf)));
+		    }
 
-		// Add did to tname's postlist
-		map<string, map<docid, pair<char, termcount> > >::iterator j;
-		j = mod_plists.find(tname);
-		if (j == mod_plists.end()) {
-		    map<docid, pair<char, termcount> > m;
-		    j = mod_plists.insert(make_pair(tname, m)).first;
+		    PositionIterator it = term.positionlist_begin();
+		    PositionIterator it_end = term.positionlist_end();
+		    if (it != it_end) {
+			position_table.set_positionlist(did, tname, it, it_end);
+		    } else {
+			position_table.delete_positionlist(did, tname);
+		    }
+		} else if (cmp == 0) {
+		    // Term already exists: look for wdf and positionlist changes.
+		    termcount old_wdf = termlist.get_wdf();
+		    termcount new_wdf = term.get_wdf();
+		    if (old_wdf != new_wdf) {
+			new_doclen += new_wdf - old_wdf;
+
+			map<string, pair<termcount_diff, termcount_diff> >::iterator i;
+			i = freq_deltas.find(new_tname);
+			if (i == freq_deltas.end()) {
+			    freq_deltas.insert(make_pair(new_tname, make_pair(0, termcount_diff(new_wdf - old_wdf))));
+			} else {
+			    i->second.second += new_wdf - old_wdf;
+			}
+		    }
+
+		    if (!positionlists_equal(term, termlist)) {
+			PositionIterator it = term.positionlist_begin();
+			PositionIterator it_end = term.positionlist_end();
+			if (it != it_end) {
+			    position_table.set_positionlist(did, new_tname, it, it_end);
+			} else {
+			    position_table.delete_positionlist(did, new_tname);
+			}
+		    }
 		}
-		map<docid, pair<char, termcount> >::iterator k;
-		k = j->second.find(did);
-		if (k != j->second.end()) {
-		    Assert(k->second.first == 'D');
-		    k->second.first = 'M';
-		    k->second.second = wdf;
-		} else {
-		    j->second.insert(make_pair(did, make_pair('A', wdf)));
-		}
 
-		PositionIterator it = term.positionlist_begin();
-		PositionIterator it_end = term.positionlist_end();
-		if (it != it_end) {
-		    position_table.set_positionlist(did, tname, it, it_end);
-		} else {
-		    position_table.delete_positionlist(did, tname);
-		}
+		if (termlist.at_end())
+		    ++term;
+		else if (term == term_end)
+		    termlist.next();
+		else {
+		    if (cmp >= 0)
+			++term;
+		    if (cmp <= 0)
+			termlist.next();
+
+ 		}
 	    }
 	    LOGLINE(DB, "Calculated doclen for replacement document " << did << " as " << new_doclen);
 
