Ticket #326: avoid_string_appends_and_loop_unroll_2.patch

File avoid_string_appends_and_loop_unroll_2.patch, 5.3 KB (added by Richard Boulton, 15 years ago)

patch which also unrolls loop in append_packed_uint_preserving_sort(), avoiding string resizing

  • chert_postlist.h

     
    8686        /// Compose a key from a termname and docid.
    8787        static string make_key(const string & term, Xapian::docid did) {
    8888            string key = make_key(term);
    89             key += pack_uint_preserving_sort(did);
     89            append_packed_uint_preserving_sort(key, did);
    9090            return key;
    9191        }
    9292
  • chert_positionlist.cc

     
    7070              did << ", " << term);
    7171
    7272    string data;
    73     if (!get_exact_entry(pack_uint_preserving_sort(did) + term, data)) {
     73    string key;
     74    // 5 bytes is usually enough for a packed uint
     75    key.reserve(5 + term.size());
     76    append_packed_uint_preserving_sort(key, did);
     77    key.append(term);
     78    if (!get_exact_entry(key, data)) {
    7479        // There's no positional information for this term.
    7580        return 0;
    7681    }
     
    106111    positions.clear();
    107112
    108113    string data;
    109     if (!table->get_exact_entry(pack_uint_preserving_sort(did) + tname, data)) {
     114    string key;
     115    // 5 bytes is usually enough for a packed uint
     116    key.reserve(5 + tname.size());
     117    append_packed_uint_preserving_sort(key, did);
     118    key.append(tname);
     119    if (!table->get_exact_entry(key, data)) {
    110120        // There's no positional information for this term.
    111121        current_pos = positions.begin();
    112122        return false;
  • chert_positionlist.h

     
    3434
    3535class ChertPositionListTable : public ChertTable {
    3636    static string make_key(Xapian::docid did, const string & tname) {
    37         return pack_uint_preserving_sort(did) + tname;
     37        string result;
     38        // 5 bytes is usually enough for a packed uint
     39        result.reserve(5 + tname.size());
     40        append_packed_uint_preserving_sort(result, did);
     41        result.append(tname);
     42        return result + tname;
    3843    }
    3944
    4045  public:
  • chert_utils.h

     
    224224 *  of 256 bytes on the length of the integer.  However, this is unlikely to
    225225 *  ever be a problem.
    226226 *
     227 *  @param result A string to append the representation of the integer to.
    227228 *  @param value  The integer to represent.
    228  *
    229  *  @result       A string containing the representation of the integer.
    230229 */
    231230template<class T>
    232 string
    233 pack_uint_preserving_sort(T value)
     231void
     232append_packed_uint_preserving_sort(string & result, T value)
    234233{
    235234    // Check unsigned
    236235    STATIC_ASSERT_UNSIGNED_TYPE(T);
     236    STATIC_ASSERT(sizeof(T) >= 4);
    237237
    238     string result;
    239     while (value != 0) {
    240         om_byte part = static_cast<om_byte>(value & 0xff);
    241         value = value >> 8;
    242         result.insert(string::size_type(0), 1u, char(part));
     238    string::size_type start = result.size();
     239    if (value <= 0xffff) {
     240        if (value < 0xff) {
     241            // 1 byte
     242            char buf[2];
     243            buf[0] = char(1);
     244            buf[1] = char(value & 0xff);
     245            result.append(buf, 2);
     246            return;
     247        } else {
     248            // 2 bytes
     249            char buf[3];
     250            buf[0] = char(2);
     251            buf[1] = char((value >> 8) & 0xff);
     252            buf[2] = char(value & 0xff);
     253            result.append(buf, 3);
     254            return;
     255        }
    243256    }
    244     result.insert(string::size_type(0), 1u, char(result.size()));
    245     return result;
     257    if (value <= 0xffffff) {
     258        // 3 bytes
     259        char buf[4];
     260        buf[0] = char(3);
     261        buf[1] = char((value >> 16) & 0xff);
     262        buf[2] = char((value >> 8) & 0xff);
     263        buf[3] = char(value & 0xff);
     264        result.append(buf, 4);
     265        return;
     266    }
     267
     268    if (value <= 0xffffffff) {
     269        // 4 bytes.
     270        char buf[5];
     271        buf[0] = char(4);
     272        buf[1] = char((value >> 24) & 0xff);
     273        buf[2] = char((value >> 16) & 0xff);
     274        buf[3] = char((value >> 8) & 0xff);
     275        buf[4] = char(value & 0xff);
     276        result.append(buf, 5);
     277        return;
     278    }
     279
     280    if (sizeof(T) > 4) {
     281        if (result.capacity() < start + 9) {
     282            result.reserve(start + 9);
     283        }
     284        // More than 4 bytes. Do first 4, then loop.
     285        result.append(string::size_type(1u), char((value >> 24) & 0xff));
     286        result.append(string::size_type(1u), char((value >> 16) & 0xff));
     287        result.append(string::size_type(1u), char((value >> 8) & 0xff));
     288        result.append(string::size_type(1u), char(value & 0xff));
     289        value = value >> 16;
     290        value = value >> 16;
     291        while (value != 0) {
     292            om_byte part = static_cast<om_byte>(value & 0xff);
     293            value = value >> 8;
     294            result.insert(start, 1u, char(part));
     295        }
     296        result.insert(start, 1u, char(result.size() - start));
     297    }
    246298}
    247299
    248300/** Unpack a unsigned integer, store in sort preserving order.
     
    401453inline string
    402454chert_docid_to_key(Xapian::docid did)
    403455{
    404     return pack_uint_preserving_sort(did);
     456    string result;
     457    append_packed_uint_preserving_sort(result, did);
     458    return result;
    405459}
    406460
    407461#endif /* OM_HGUARD_CHERT_UTILS_H */
  • chert_values.h

     
    3737{
    3838    std::string key("\0\xd8", 2);
    3939    key += pack_uint(slot);
    40     key += pack_uint_preserving_sort(did);
     40    append_packed_uint_preserving_sort(key, did);
    4141    return key;
    4242}
    4343