Index: xapian-compact.cc
===================================================================
--- xapian-compact.cc	(revision 9054)
+++ xapian-compact.cc	(working copy)
@@ -231,6 +231,200 @@
     }
 }
 
+
+/// A structure holding the result of performing a compaction.
+struct CompactionResult {
+    /// Sometimes stat can fail for benign reasons (e.g. >= 2GB file on certain
+    /// systems).  This keeps track of whether this has happened.
+    bool bad_stat;
+
+    /// Size of the input DB file (in Kb).
+    off_t in_size;
+
+    /// Size of the output DB file (in Kb).
+    off_t out_size;
+
+    CompactionResult()
+	    : bad_stat(false), in_size(0), out_size(0)
+    {}
+
+    /// Calculate (by calling stat) the size of the output.
+    void read_out_size(const string & dest)
+    {
+	if (!bad_stat) {
+	    struct stat sb;
+	    if (stat(dest + "DB", &sb) == 0) {
+		out_size = sb.st_size / 1024;
+	    } else {
+		bad_stat = (errno != ENOENT);
+	    }
+	}
+    }
+
+    /// Get the percentage decrease in the size.
+    double percent_decrease()
+    {
+	return 100 * double(in_size - out_size) / in_size;
+    }
+};
+
+struct table_list {
+    // The "base name" of the table.
+    const char * name;
+    // zlib compression strategy to use on tags.
+    int compress_strategy;
+    // Create tables after position lazily.
+    bool lazy;
+};
+
+static const table_list tables[] = {
+    // name		compress_strategy	lazy
+    { "postlist",	DONT_COMPRESS,		false },
+    { "record",		Z_DEFAULT_STRATEGY,	false },
+    { "termlist",	Z_DEFAULT_STRATEGY,	false },
+    { "position",	DONT_COMPRESS,		true },
+    { "value",		DONT_COMPRESS,		true },
+    { "spelling",	Z_DEFAULT_STRATEGY,	true },
+    { "synonyms",	Z_DEFAULT_STRATEGY,	true }
+};
+
+static CompactionResult 
+compact_postlists(FlintTable * out, vector<string> sources,
+		  const table_list * table,
+		  const vector<Xapian::docid> & offset,
+		  bool multipass,
+		  string destdir,
+		  size_t block_size,
+		  Xapian::docid tot_off)
+{
+    CompactionResult result;
+    vector<string> tmp;
+    tmp.reserve(sources.size());
+    for (vector<string>::const_iterator src = sources.begin();
+	 src != sources.end(); ++src) {
+	string s(*src);
+	s += table->name;
+	s += '.';
+	tmp.push_back(s);
+
+	struct stat sb;
+	if (stat(s + "DB", &sb) == 0) {
+	    result.in_size += sb.st_size / 1024;
+	} else {
+	    result.bad_stat = (errno != ENOENT);
+	}
+    }
+    vector<Xapian::docid> off(offset);
+    unsigned int c = 0;
+    while (multipass && tmp.size() > 3) {
+	vector<string> tmpout;
+	tmpout.reserve(tmp.size() / 2);
+	vector<Xapian::docid> newoff;
+	newoff.resize(tmp.size() / 2);
+	for (unsigned int i = 0, j; i < tmp.size(); i = j) {
+	    j = i + 2;
+	    if (j == tmp.size() - 1) ++j;
+
+	    string dest = destdir;
+	    char buf[64];
+	    sprintf(buf, "/tmp%u_%u.", c, i / 2);
+	    dest += buf;
+
+	    // Don't compress temporary tables, even if the
+	    // final table would be.
+	    FlintTable tmptab(dest, false);
+	    tmptab.create_and_open(block_size);
+
+	    merge_postlists(&tmptab, off.begin() + i, tmp.begin() + i, tmp.begin() + j, 0);
+	    if (c > 0) {
+		for (unsigned int k = i; k < j; ++k) {
+		    unlink((tmp[k] + "DB").c_str());
+		    unlink((tmp[k] + "baseA").c_str());
+		    unlink((tmp[k] + "baseB").c_str());
+		}
+	    }
+	    tmpout.push_back(dest);
+	    tmptab.commit(1);
+	}
+	swap(tmp, tmpout);
+	swap(off, newoff);
+	++c;
+    }
+    merge_postlists(out, off.begin(), tmp.begin(), tmp.end(), tot_off);
+    if (c > 0) {
+	for (size_t k = 0; k < tmp.size(); ++k) {
+	    unlink((tmp[k] + "DB").c_str());
+	    unlink((tmp[k] + "baseA").c_str());
+	    unlink((tmp[k] + "baseB").c_str());
+	}
+    }
+    return result;
+}
+
+static CompactionResult 
+compact_table(FlintTable * out, vector<string> sources,
+	      const table_list * table,
+	      const vector<Xapian::docid> & offset)
+{
+    CompactionResult result;
+    // Position, Record, Termlist, Value
+    bool is_position_table = strcmp(table->name, "position") == 0;
+    for (size_t i = 0; i < sources.size(); ++i) {
+	Xapian::docid off = offset[i];
+	string src(sources[i]);
+	src += table->name;
+	src += '.';
+
+	struct stat sb;
+	if (stat(src + "DB", &sb) == 0) {
+	    if (sb.st_size == 0) continue;
+	    result.in_size += sb.st_size / 1024;
+	} else {
+	    result.bad_stat = (errno != ENOENT);
+	}
+
+	FlintTable in(src, true, table->compress_strategy, table->lazy);
+	in.open();
+	if (in.get_entry_count() == 0) continue;
+
+	FlintCursor cur(&in);
+	cur.find_entry("");
+
+	string key;
+	while (cur.next()) {
+	    // Adjust the key if this isn't the first database.
+	    if (off) {
+		Xapian::docid did;
+		const char * d = cur.current_key.data();
+		const char * e = d + cur.current_key.size();
+		if (!unpack_uint_preserving_sort(&d, e, &did)) {
+		    string msg = "Bad ";
+		    msg += table->name;
+		    msg += " key";
+		    throw Xapian::DatabaseCorruptError(msg);
+		}
+		did += off;
+		key = pack_uint_preserving_sort(did);
+		if (is_position_table) {
+		    // Copy over the termname too.
+		    size_t tnameidx = d - cur.current_key.data();
+		    key += cur.current_key.substr(tnameidx);
+		} else if (d != e) {
+		    string msg = "Bad ";
+		    msg += table->name;
+		    msg += " key";
+		    throw Xapian::DatabaseCorruptError(msg);
+		}
+	    } else {
+		key = cur.current_key;
+	    }
+	    bool compressed = cur.read_tag(true);
+	    out->add(key, cur.current_tag, compressed);
+	}
+    }
+    return result;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -360,25 +554,6 @@
 	    }
 	}
 
-	struct table_list {
-	    // The "base name" of the table.
-	    const char * name;
-	    // zlib compression strategy to use on tags.
-	    int compress_strategy;
-	    // Create tables after position lazily.
-	    bool lazy;
-	};
-
-	static const table_list tables[] = {
-	    // name		compress_strategy	lazy
-	    { "postlist",	DONT_COMPRESS,		false },
-	    { "record",		Z_DEFAULT_STRATEGY,	false },
-	    { "termlist",	Z_DEFAULT_STRATEGY,	false },
-	    { "position",	DONT_COMPRESS,		true },
-	    { "value",		DONT_COMPRESS,		true },
-	    { "spelling",	Z_DEFAULT_STRATEGY,	true },
-	    { "synonyms",	Z_DEFAULT_STRATEGY,	true }
-	};
 	const table_list * tables_end = tables +
 	    (sizeof(tables) / sizeof(tables[0]));
 
@@ -405,159 +580,35 @@
 	    out.set_full_compaction(compaction != STANDARD);
 	    if (compaction == FULLER) out.set_max_item_size(1);
 
-	    // Sometimes stat can fail for benign reasons (e.g. >= 2GB file
-	    // on certain systems).
-	    bool bad_stat = false;
+	    CompactionResult result;
 
-	    off_t in_size = 0;
-
 	    if (strcmp(t->name, "postlist") == 0) {
-		vector<string> tmp;
-		tmp.reserve(sources.size());
-		for (vector<string>::const_iterator src = sources.begin();
-		     src != sources.end(); ++src) {
-		    string s(*src);
-		    s += t->name;
-		    s += '.';
-		    tmp.push_back(s);
-
-		    struct stat sb;
-		    if (stat(s + "DB", &sb) == 0) {
-			in_size += sb.st_size / 1024;
-		    } else {
-			bad_stat = (errno != ENOENT);
-		    }
-		}
-		vector<Xapian::docid> off(offset);
-		unsigned int c = 0;
-		while (multipass && tmp.size() > 3) {
-		    vector<string> tmpout;
-		    tmpout.reserve(tmp.size() / 2);
-		    vector<Xapian::docid> newoff;
-		    newoff.resize(tmp.size() / 2);
-		    for (unsigned int i = 0, j; i < tmp.size(); i = j) {
-			j = i + 2;
-			if (j == tmp.size() - 1) ++j;
-
-			string dest = destdir;
-			char buf[64];
-			sprintf(buf, "/tmp%u_%u.", c, i / 2);
-			dest += buf;
-
-			// Don't compress temporary tables, even if the
-			// final table would be.
-			FlintTable tmptab(dest, false);
-			tmptab.create_and_open(block_size);
-
-			merge_postlists(&tmptab, off.begin() + i, tmp.begin() + i, tmp.begin() + j, 0);
-			if (c > 0) {
-			    for (unsigned int k = i; k < j; ++k) {
-				unlink((tmp[k] + "DB").c_str());
-				unlink((tmp[k] + "baseA").c_str());
-				unlink((tmp[k] + "baseB").c_str());
-			    }
-			}
-			tmpout.push_back(dest);
-			tmptab.commit(1);
-		    }
-		    swap(tmp, tmpout);
-		    swap(off, newoff);
-		    ++c;
-		}
-		merge_postlists(&out, off.begin(), tmp.begin(), tmp.end(), tot_off);
-		if (c > 0) {
-		    for (size_t k = 0; k < tmp.size(); ++k) {
-			unlink((tmp[k] + "DB").c_str());
-			unlink((tmp[k] + "baseA").c_str());
-			unlink((tmp[k] + "baseB").c_str());
-		    }
-		}
+		result = compact_postlists(&out, sources, t, offset, multipass, destdir, block_size, tot_off);
 	    } else {
-		// Position, Record, Termlist, Value
-		bool is_position_table = strcmp(t->name, "position") == 0;
-		for (size_t i = 0; i < sources.size(); ++i) {
-		    Xapian::docid off = offset[i];
-		    string src(sources[i]);
-		    src += t->name;
-		    src += '.';
-
-		    struct stat sb;
-		    if (stat(src + "DB", &sb) == 0) {
-			if (sb.st_size == 0) continue;
-			in_size += sb.st_size / 1024;
-		    } else {
-			bad_stat = (errno != ENOENT);
-		    }
-
-		    FlintTable in(src, true, t->compress_strategy, t->lazy);
-		    in.open();
-		    if (in.get_entry_count() == 0) continue;
-
-		    FlintCursor cur(&in);
-		    cur.find_entry("");
-
-		    string key;
-		    while (cur.next()) {
-			// Adjust the key if this isn't the first database.
-			if (off) {
-			    Xapian::docid did;
-			    const char * d = cur.current_key.data();
-			    const char * e = d + cur.current_key.size();
-			    if (!unpack_uint_preserving_sort(&d, e, &did)) {
-				string msg = "Bad ";
-				msg += t->name;
-				msg += " key";
-				throw Xapian::DatabaseCorruptError(msg);
-			    }
-			    did += off;
-			    key = pack_uint_preserving_sort(did);
-			    if (is_position_table) {
-				// Copy over the termname too.
-				size_t tnameidx = d - cur.current_key.data();
-				key += cur.current_key.substr(tnameidx);
-			    } else if (d != e) {
-				string msg = "Bad ";
-				msg += t->name;
-				msg += " key";
-				throw Xapian::DatabaseCorruptError(msg);
-			    }
-			} else {
-			    key = cur.current_key;
-			}
-			bool compressed = cur.read_tag(true);
-			out.add(key, cur.current_tag, compressed);
-		    }
-		}
+		result = compact_table(&out, sources, t, offset);
 	    }
 
 	    // And commit as revision 1.
 	    out.commit(1);
 
 	    cout << '\r' << t->name << ": ";
-	    off_t out_size = 0;
-	    if (!bad_stat) {
-		struct stat sb;
-		if (stat(dest + "DB", &sb) == 0) {
-		    out_size = sb.st_size / 1024;
-		} else {
-		    bad_stat = (errno != ENOENT);
-		}
-	    }
-	    if (bad_stat) {
+	    result.read_out_size(dest);
+	    
+	    if (result.bad_stat) {
 		cout << "Done (couldn't stat all the DB files)";
 	    } else {
-		if (out_size == in_size) {
+		if (result.out_size == result.in_size) {
 		    cout << "Size unchanged (";
-		} else if (out_size < in_size) {
+		} else if (result.out_size < result.in_size) {
 		    cout << "Reduced by "
-			 << 100 * double(in_size - out_size) / in_size << "% "
-			 << in_size - out_size << "K (" << in_size << "K -> ";
+			 << result.percent_decrease() << "% "
+			 << result.in_size - result.out_size << "K (" << result.in_size << "K -> ";
 		} else {
 		    cout << "INCREASED by "
-			 << 100 * double(out_size - in_size) / in_size << "% "
-			 << out_size - in_size << "K (" << in_size << "K -> ";
+			 << -result.percent_decrease() << "% "
+			 << result.out_size - result.in_size << "K (" << result.in_size << "K -> ";
 		}
-		cout << out_size << "K)";
+		cout << result.out_size << "K)";
 	    }
 	    cout << endl;
 	}
