Ticket #671: readahead-improved.patch

File readahead-improved.patch, 13.6 KB (added by Olly Betts, 10 years ago)

improved patch

  • xapian-core/api/query.cc

    diff --git a/xapian-core/api/query.cc b/xapian-core/api/query.cc
    index 9e3c9ab..5a2cebb 100644
    a b Query::get_terms_begin() const  
    144144    return TermIterator(new VectorTermList(v.begin(), v.end()));
    145145}
    146146
     147const TermIterator
     148Query::get_unique_terms_begin() const
     149{
     150    if (!internal.get())
     151        return TermIterator();
     152
     153    vector<pair<Xapian::termpos, string> > terms;
     154    internal->gather_terms(static_cast<void*>(&terms));
     155    sort(terms.begin(), terms.end(), [](
     156                const pair<Xapian::termpos, string>& a,
     157                const pair<Xapian::termpos, string>& b) {
     158        return a.second < b.second;
     159    });
     160
     161    vector<string> v;
     162    vector<pair<Xapian::termpos, string> >::const_iterator i;
     163    const string * old_term = NULL;
     164    for (i = terms.begin(); i != terms.end(); ++i) {
     165        // Remove duplicate term names.
     166        if (old_term && *old_term == i->second)
     167            continue;
     168
     169        v.push_back(i->second);
     170        old_term = &(i->second);
     171    }
     172    return TermIterator(new VectorTermList(v.begin(), v.end()));
     173}
     174
    147175Xapian::termcount
    148176Query::get_length() const
    149177{
  • xapian-core/backends/chert/chert_database.cc

    diff --git a/xapian-core/backends/chert/chert_database.cc b/xapian-core/backends/chert/chert_database.cc
    index a269ce9..5bec204 100644
    a b ChertDatabase::set_revision_number(chert_revision_number_t new_revision)  
    472472    }
    473473}
    474474
     475void
     476ChertDatabase::request_document(Xapian::docid did) const
     477{
     478    record_table.readahead_for_record(did);
     479}
     480
     481void
     482ChertDatabase::readahead_for_query(const Xapian::Query &query)
     483{
     484    Xapian::TermIterator t;
     485    for (t = query.get_unique_terms_begin(); t != Xapian::TermIterator(); ++t) {
     486        const string & term = *t;
     487        postlist_table.readahead_key(ChertPostListTable::make_key(term));
     488    }
     489}
     490
    475491bool
    476492ChertDatabase::reopen()
    477493{
  • xapian-core/backends/chert/chert_database.h

    diff --git a/xapian-core/backends/chert/chert_database.h b/xapian-core/backends/chert/chert_database.h
    index b7375d1..e405f05 100644
    a b class ChertDatabase : public Xapian::Database::Internal {  
    298298                                    Xapian::ReplicationInfo * info);
    299299        string get_revision_info() const;
    300300        string get_uuid() const;
     301
     302        void request_document(Xapian::docid /*did*/) const;
     303        void readahead_for_query(const Xapian::Query &query);
    301304        //@}
    302305
    303306        XAPIAN_NORETURN(void throw_termlist_table_close_exception() const);
  • xapian-core/backends/chert/chert_record.cc

    diff --git a/xapian-core/backends/chert/chert_record.cc b/xapian-core/backends/chert/chert_record.cc
    index 55ba264..e8a1578 100644
    a b ChertRecordTable::delete_record(Xapian::docid did)  
    7979    if (!del(make_key(did)))
    8080        throw Xapian::DocNotFoundError("Can't delete non-existent document #" + str(did));
    8181}
     82
     83void
     84ChertRecordTable::readahead_for_record(Xapian::docid did) const
     85{
     86    readahead_key(make_key(did));
     87}
  • xapian-core/backends/chert/chert_record.h

    diff --git a/xapian-core/backends/chert/chert_record.h b/xapian-core/backends/chert/chert_record.h
    index 0f6607d..8887031 100644
    a b class ChertRecordTable : public ChertTable {  
    6868        /** Delete a record from the table.
    6969         */
    7070        void delete_record(Xapian::docid did);
     71
     72        void readahead_for_record(Xapian::docid did) const;
    7173};
    7274
    7375#endif /* OM_HGUARD_CHERT_RECORD_H */
  • xapian-core/backends/chert/chert_table.cc

    diff --git a/xapian-core/backends/chert/chert_table.cc b/xapian-core/backends/chert/chert_table.cc
    index 7eaaa35..3ef21dd 100644
    a b ChertTable::del(const string &key)  
    11371137}
    11381138
    11391139bool
     1140ChertTable::readahead_key(const string &key) const
     1141{
     1142    LOGCALL(DB, bool, "ChertTable::readahead_key", key);
     1143    Assert(!key.empty());
     1144
     1145    // Two cases:
     1146    //
     1147    // handle = -1:  Lazy table which isn't yet open
     1148    //
     1149    // handle = -2:  Table has been closed.  Since the readahead is just a
     1150    // hint, we can safely ignore it for a closed table.
     1151    if (handle < 0)
     1152        RETURN(false);
     1153
     1154    // If the table only has one level, there are no branch blocks to preread.
     1155    if (level == 0)
     1156        RETURN(false);
     1157
     1158    form_key(key);
     1159    Key ktkey = kt.key();
     1160
     1161    // We'll only readahead the first level, since descending the B-tree would
     1162    // require actual reads that would likely hurt performance more than help.
     1163    const byte * p = C[level].p;
     1164    int c = find_in_block(p, ktkey, false, C[level].c);
     1165    uint4 n = Item(p, c).block_given_by();
     1166    if (n != last_readahead) {
     1167        /* Use the base bit_map_size not the bitmap's size, because the latter
     1168         * is uninitialised in readonly mode.
     1169         */
     1170        Assert(n / CHAR_BIT < base.get_bit_map_size());
     1171
     1172        io_readahead_block(handle, block_size, n);
     1173        last_readahead = n;
     1174    }
     1175    RETURN(true);
     1176}
     1177
     1178bool
    11401179ChertTable::get_exact_entry(const string &key, string & tag) const
    11411180{
    11421181    LOGCALL(DB, bool, "ChertTable::get_exact_entry", key | tag);
  • xapian-core/backends/chert/chert_table.h

    diff --git a/xapian-core/backends/chert/chert_table.h b/xapian-core/backends/chert/chert_table.h
    index 2ea61ec..bb89b90 100644
    a b class XAPIAN_VISIBILITY_DEFAULT ChertTable {  
    334334         */
    335335        void close(bool permanent=false);
    336336
     337        bool readahead_key(const string &key) const;
     338
    337339        /** Determine whether the btree exists on disk.
    338340         */
    339341        bool exists() const;
    class XAPIAN_VISIBILITY_DEFAULT ChertTable {  
    794796        /// If true, don't create the table until it's needed.
    795797        bool lazy;
    796798
     799        /// Last block readahead_key() preread.
     800        mutable uint4 last_readahead;
     801
    797802        /* Debugging methods */
    798803//      void report_block_full(int m, int n, const byte * p);
    799804};
  • xapian-core/backends/database.cc

    diff --git a/xapian-core/backends/database.cc b/xapian-core/backends/database.cc
    index e98b32a..1305900 100644
    a b Database::Internal::keep_alive()  
    5050}
    5151
    5252
     53void
     54Database::Internal::readahead_for_query (const Xapian::Query &)
     55{
     56}
     57
    5358Xapian::doccount
    5459Database::Internal::get_value_freq(Xapian::valueno) const
    5560{
  • xapian-core/backends/database.h

    diff --git a/xapian-core/backends/database.h b/xapian-core/backends/database.h
    index a0e7b43..7914cc0 100644
    a b typedef Xapian::ValueIterator::Internal ValueList;  
    4848
    4949namespace Xapian {
    5050
     51class Query;
    5152struct ReplicationInfo;
    5253
    5354/** Base class for databases.
    class Database::Internal : public Xapian::Internal::intrusive_base {  
    105106         */
    106107        virtual void keep_alive();
    107108
     109        virtual void readahead_for_query (const Xapian::Query & query);
     110
    108111        //////////////////////////////////////////////////////////////////
    109112        // Database statistics:
    110113        // ====================
  • xapian-core/backends/glass/glass_database.cc

    diff --git a/xapian-core/backends/glass/glass_database.cc b/xapian-core/backends/glass/glass_database.cc
    index db6facf..d1bd645 100644
    a b GlassDatabase::set_revision_number(int flags, glass_revision_number_t new_revisi  
    347347    changes.commit(new_revision, flags);
    348348}
    349349
     350void
     351GlassDatabase::request_document(Xapian::docid did) const
     352{
     353    docdata_table.readahead_for_document(did);
     354}
     355
     356void
     357GlassDatabase::readahead_for_query(const Xapian::Query &query)
     358{
     359    Xapian::TermIterator t;
     360    for (t = query.get_unique_terms_begin(); t != Xapian::TermIterator(); ++t) {
     361        const string & term = *t;
     362        postlist_table.readahead_key(GlassPostListTable::make_key(term));
     363    }
     364}
     365
    350366bool
    351367GlassDatabase::reopen()
    352368{
  • xapian-core/backends/glass/glass_database.h

    diff --git a/xapian-core/backends/glass/glass_database.h b/xapian-core/backends/glass/glass_database.h
    index 0f1fedd..d8dc53e 100644
    a b class GlassDatabase : public Xapian::Database::Internal {  
    286286                                    Xapian::ReplicationInfo * info);
    287287        string get_revision_info() const;
    288288        string get_uuid() const;
     289
     290        void request_document(Xapian::docid /*did*/) const;
     291        void readahead_for_query(const Xapian::Query &query);
    289292        //@}
    290293
    291294        XAPIAN_NORETURN(void throw_termlist_table_close_exception() const);
  • xapian-core/backends/glass/glass_docdata.h

    diff --git a/xapian-core/backends/glass/glass_docdata.h b/xapian-core/backends/glass/glass_docdata.h
    index 4fbf57a..5fed0a0 100644
    a b class GlassDocDataTable : public GlassLazyTable {  
    9999     *               there's no such document, or the document has no data).
    100100     */
    101101    bool delete_document_data(Xapian::docid did) { return del(make_key(did)); }
     102
     103    void readahead_for_document(Xapian::docid did) const {
     104        readahead_key(make_key(did));
     105    }
    102106};
    103107
    104108#endif // XAPIAN_INCLUDED_GLASS_DOCDATA_H
  • xapian-core/backends/glass/glass_table.cc

    diff --git a/xapian-core/backends/glass/glass_table.cc b/xapian-core/backends/glass/glass_table.cc
    index 5c3e513..3c8505a 100644
    a b GlassTable::del(const string &key)  
    11571157}
    11581158
    11591159bool
     1160GlassTable::readahead_key(const string &key) const
     1161{
     1162    LOGCALL(DB, bool, "GlassTable::readahead_key", key);
     1163    Assert(!key.empty());
     1164
     1165    // Two cases:
     1166    //
     1167    // handle = -1:  Lazy table which isn't yet open
     1168    //
     1169    // handle = -2:  Table has been closed.  Since the readahead is just a
     1170    // hint, we can safely ignore it for a closed table.
     1171    if (handle < 0)
     1172        RETURN(false);
     1173
     1174    // If the table only has one level, there are no branch blocks to preread.
     1175    if (level == 0)
     1176        RETURN(false);
     1177
     1178    form_key(key);
     1179    Key ktkey = kt.key();
     1180
     1181    // We'll only readahead the first level, since descending the B-tree would
     1182    // require actual reads that would likely hurt performance more than help.
     1183    const byte * p = C[level].get_p();
     1184    int c = find_in_block(p, ktkey, false, C[level].c);
     1185    uint4 n = Item(p, c).block_given_by();
     1186    if (n != last_readahead) {
     1187        io_readahead_block(handle, block_size, n);
     1188        last_readahead = n;
     1189    }
     1190    RETURN(true);
     1191}
     1192
     1193bool
    11601194GlassTable::get_exact_entry(const string &key, string & tag) const
    11611195{
    11621196    LOGCALL(DB, bool, "GlassTable::get_exact_entry", key | tag);
    GlassTable::GlassTable(const char * tablename_, const string & path_,  
    14541488          split_p(0),
    14551489          compress_strategy(compress_strategy_),
    14561490          comp_stream(compress_strategy_),
    1457           lazy(lazy_)
     1491          lazy(lazy_),
     1492          last_readahead(uint4(-1))
    14581493{
    14591494    LOGCALL_CTOR(DB, "GlassTable", tablename_ | path_ | readonly_ | compress_strategy_ | lazy_);
    14601495}
  • xapian-core/backends/glass/glass_table.h

    diff --git a/xapian-core/backends/glass/glass_table.h b/xapian-core/backends/glass/glass_table.h
    index c9b2282..9c67e10 100644
    a b class GlassTable {  
    346346         */
    347347        void close(bool permanent=false);
    348348
     349        bool readahead_key(const string &key) const;
     350
    349351        /** Determine whether the btree exists on disk.
    350352         */
    351353        bool exists() const;
    class GlassTable {  
    752754        /// If true, don't create the table until it's needed.
    753755        bool lazy;
    754756
     757        /// Last block readahead_key() preread.
     758        mutable uint4 last_readahead;
     759
    755760        /* Debugging methods */
    756761//      void report_block_full(int m, int n, const byte * p);
    757762};
  • xapian-core/common/io_utils.cc

    diff --git a/xapian-core/common/io_utils.cc b/xapian-core/common/io_utils.cc
    index 5494b82..1d9e1f6 100644
    a b throw_block_error(const char * s, off_t b, int e)  
    162162}
    163163
    164164void
     165io_readahead_block(int fd, size_t n, off_t b)
     166{
     167#ifdef HAVE_POSIX_FADVISE
     168    off_t o = b * n;
     169    posix_fadvise(fd, o, n, POSIX_FADV_WILLNEED);
     170#endif
     171}
     172
     173void
    165174io_read_block(int fd, char * p, size_t n, off_t b)
    166175{
    167176    off_t o = b * n;
  • xapian-core/common/io_utils.h

    diff --git a/xapian-core/common/io_utils.h b/xapian-core/common/io_utils.h
    index 9ddcb11..c647b0e 100644
    a b inline void io_write(int fd, const unsigned char * p, size_t n) {  
    116116    io_write(fd, reinterpret_cast<const char *>(p), n);
    117117}
    118118
     119/// Readahead block b size n bytes from file descriptor fd
     120void io_readahead_block(int fd, size_t n, off_t b);
     121
    119122/// Read block b size n bytes into buffer p from file descriptor fd.
    120123void io_read_block(int fd, char * p, size_t n, off_t b);
    121124
  • xapian-core/configure.ac

    diff --git a/xapian-core/configure.ac b/xapian-core/configure.ac
    index 31cc8b6..2e40217 100644
    a b AC_CHECK_DECL([fdatasync(int)], [  
    800800)
    801801
    802802AC_CHECK_FUNCS([fsync])
     803AC_CHECK_FUNCS([posix_fadvise])
    803804
    804805dnl HP-UX has pread and pwrite, but they don't work!  Apparently this problem
    805806dnl manifests when largefile support is enabled, and we definitely want that
  • xapian-core/include/xapian/query.h

    diff --git a/xapian-core/include/xapian/query.h b/xapian-core/include/xapian/query.h
    index 256311f..504a4b1 100644
    a b class XAPIAN_VISIBILITY_DEFAULT Query {  
    252252        return TermIterator();
    253253    }
    254254
     255    const TermIterator get_unique_terms_begin() const;
     256
    255257    Xapian::termcount get_length() const XAPIAN_PURE_FUNCTION;
    256258
    257259    bool XAPIAN_NOTHROW(empty() const) XAPIAN_PURE_FUNCTION {
  • xapian-core/matcher/multimatch.cc

    diff --git a/xapian-core/matcher/multimatch.cc b/xapian-core/matcher/multimatch.cc
    index f51edc1..e65bf60 100644
    a b MultiMatch::MultiMatch(const Xapian::Database &db_,  
    332332                is_remote[i] = true;
    333333            } else {
    334334                smatch = new LocalSubMatch(subdb, query, qlen, subrsets[i], weight);
     335                subdb->readahead_for_query(query);
    335336            }
    336337#else
    337338            // Avoid unused parameter warnings.