Context Navigation

Back to Ticket #165

Ticket #165: numvaluerange.patch

File numvaluerange.patch, 10.0 KB (added by Richard Boulton, 17 years ago)
Implementation of a fix

tests/queryparsertest.cc

 #include <xapian.h>
 #include <iostream>
 #include <string>
+#include <math.h>
 #include "utils.h"
 using namespace std;
 …
 static test test_value_range2_queries[] = {
     { "a..b", "VALUE_RANGE 3 a b" },
     { "1..12", "VALUE_RANGE 2 1 12" },
+    { "1..12", "VALUE_RANGE 2 \2044 \2047\200" },
     { "20070201..20070228", "VALUE_RANGE 1 20070201 20070228" },
     { "$10..20", "VALUE_RANGE 4 10 20" },
     { "$10..$20", "VALUE_RANGE 4 10 20" },
     { "12..42kg", "VALUE_RANGE 5 12 42" },
     { "12kg..42kg", "VALUE_RANGE 5 12 42" },
+    { "$10..20", "VALUE_RANGE 4 \2047@ \2048@" },
+    { "$10..$20", "VALUE_RANGE 4 \2047@ \2048@" },
+    { "12..42kg", "VALUE_RANGE 5 \2047\200 \2049P" },
+    { "12kg..42kg", "VALUE_RANGE 5 \2047\200 \2049P" },
     { "12kg..42", "VALUE_RANGE 3 12kg 42" },
     { "10..$20", "VALUE_RANGE 3 10 $20" },
     { "1999-03-12..2020-12-30", "VALUE_RANGE 1 19990312 20201230" },
 …
     { "12/03/99..12/04/01", "VALUE_RANGE 1 19990312 20010412" },
     { "03-12-99..04-14-01", "VALUE_RANGE 1 19990312 20010414" },
     { "(test:a..test:b hello)", "(hello:(pos=1) FILTER VALUE_RANGE 3 test:a test:b)" },
     { "12..42kg 5..6kg 1..12", "(VALUE_RANGE 2 1 12 AND (VALUE_RANGE 5 12 42 OR VALUE_RANGE 5 5 6))" },
+    { "12..42kg 5..6kg 1..12", "(VALUE_RANGE 2 \2044 \2047\200 AND (VALUE_RANGE 5 \2047\200 \2049P OR VALUE_RANGE 5 \2046@ \2046\200))" },
     { NULL, NULL }
 };
 …
     Xapian::WritableDatabase db(Xapian::InMemory::open());
     int low = 0;  // FIXME - should it work with negative numbers?
                   // If so, test it with some by setting low to -10
     int high = 9; // Currently the test passes if high is 9, but not if it is 10.
+    int high = 100; // Currently the test passes if high is 9, but not if it is 10.
     for (int i = low; i <= high; ++i) {
         Xapian::Document doc;
         doc.add_value(1, om_tostring(i));
+        doc.add_value(1, Xapian::NumberValueRangeProcessor::float_to_string(i));
         db.add_document(doc);
+    }
 …
             if (end < start) {
                 TEST_EQUAL(mset.size(), 0);
             } else {
                 //TEST_EQUAL(mset.size(), 1u + end - start);
+                TEST_EQUAL(mset.size(), 1u + end - start);
                 for (unsigned int j = 0; j != mset.size(); j++) {
                     TEST_EQUAL(mset[j].get_document().get_value(1),
                                om_tostring(static_cast<int>(j) + start));
+                               Xapian::NumberValueRangeProcessor::float_to_string(static_cast<int>(j) + start));
+                }
+            }
+        }
 …
     return true;
+}
+static double test_value_range_numbers[] = {
+    -pow(2, 1022),
+    -1024.5,
+    -3.14159265358979323846,
+    -2,
+    -1.8,
+    -1.1,
+    -1,
+    -0.5,
+    -0.2,
+    -0.1,
+    -0.000005,
+    -0.000002,
+    -0.000001,
+    -pow(2, -1023),
+    -pow(2, -1024),
+    -pow(2, -1074),
+,
+    pow(2, -1074),
+    pow(2, -1024),
+    pow(2, -1023),
+.000001,
+.000002,
+.000005,
+.1,
+.2,
+.5,
+,
+.1,
+.8,
+,
+.14159265358979323846,
+.5,
+    pow(2, 1022),
+// Magic number which we stop at.
+};
+// Test serialisation and unserialisation of various numbers and string.
+static bool test_value_range_serialise1()
+{
+    double prevnum = 0;
+    string prevstr = "";
+    bool started = false;
+    for (double *p = test_value_range_numbers; *p != 64; ++p) {
+        double num = *p;
+        tout << "Number: " << num << '\n';
+        string str = Xapian::NumberValueRangeProcessor::float_to_string(num);
+        tout << "String: " << str << '\n';
+        TEST_EQUAL(Xapian::NumberValueRangeProcessor::string_to_float(str), num);
+        if (started) {
+            TEST_AND_EXPLAIN(prevnum < num, "Expected previous number (" <<
+                             prevnum << ") to be less than current number (" <<
+                             num << ")");
+            TEST_AND_EXPLAIN(prevstr < str, "Expected previous string (" <<
+                             prevstr << ") to be less than current string (" <<
+                             str << ")");
+        }
+        prevnum = num;
+        prevstr = str;
+        started = true;
+    }
+    return true;
+}
 static test test_value_daterange1_queries[] = {
     { "12/03/99..12/04/01", "VALUE_RANGE 1 19991203 20011204" },
     { "03-12-99..04-14-01", "VALUE_RANGE 1 19990312 20010414" },
 …
     TESTCASE(qp_value_range2),
     TESTCASE(qp_value_range3),
     TESTCASE(qp_value_daterange1),
+    TESTCASE(value_range_serialise1),
     TESTCASE(qp_value_customrange1),
     TESTCASE(qp_stoplist1),
     END_OF_TESTCASES

include/xapian/queryparser.h

         : valno(valno_), prefix(prefix_), str(str_) { }
     Xapian::valueno operator()(std::string &begin, std::string &end);
+    static std::string float_to_string(double value);
+    static double string_to_float(const std::string & value);
 };
 /// Build a Xapian::Query object from a user query string.

api/valuerangeproc.cc

 #include <xapian/queryparser.h>
+#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string>
 #include "stringutils.h"
+#include "safeerrno.h"
+#include "omassert.h"
 using namespace std;
 …
+        }
+    }
-    if (begin.find_first_not_of("0123456789", b_b) != b_e)
-        // Not a number.
-        return Xapian::BAD_VALUENO;
-    if (end.find_first_not_of("0123456789", e_b) != e_e)
-        // Not a number.
-        return Xapian::BAD_VALUENO;
     // Adjust begin string if necessary.
     if (b_b)
         begin.erase(0, b_b);
 …
     else if (e_e != string::npos)
         end.resize(e_e);
+    // Parse the numbers to floating point.
+    double beginnum, endnum;
+    const char * startptr;
+    char * endptr;
+    errno = 0;
+    startptr = begin.c_str();
+    beginnum = strtod(startptr, &endptr);
+    if (endptr != startptr + begin.size())
+        // Invalid characters in string
+        return Xapian::BAD_VALUENO;
+    if (errno)
+        // Overflow or underflow
+        return Xapian::BAD_VALUENO;
+    errno = 0;
+    startptr = end.c_str();
+    endnum = strtod(startptr, &endptr);
+    if (endptr != startptr + end.size())
+        // Invalid characters in string
+        return Xapian::BAD_VALUENO;
+    if (errno)
+        // Overflow or underflow
+        return Xapian::BAD_VALUENO;
+    begin.assign(float_to_string(beginnum));
+    end.assign(float_to_string(endnum));
     return valno;
+}
+string
+Xapian::NumberValueRangeProcessor::float_to_string(double value)
+{
+    double mantissa;
+    int exponent;
+    mantissa = frexp(value, &exponent);
+    bool negative = false;
+    if (mantissa < 0) {
+        negative = true;
+        mantissa = -mantissa;
+    }
+    /* IEEE representation of doubles uses 11 bits for the exponent, with a
+     * bias of 1023.  There's then another 52 bits in the mantissa, so we need
+     * to add 1075 to be sure that the exponent won't be negative.  Even then,
+     * we check that the exponent isn't negative, and consider the value to be
+     * equal to zero if it is, to be safe on architectures which use a
+     * different representation.
+     */
+    exponent += 1075;
+    if (exponent < 0) {
+        /* Note - this can't happen on most architectures. */
+        exponent = 0;
+        mantissa = 0;
+        negative = false;
+    } else if (mantissa == 0) {
+        exponent = 0;
+    }
+    // First, store the exponent, as two bytes
+    // Top bit of first byte is a sign bit.
+    // If the sign bit is set, number is positive.
+    // If the sign bit is unset, number is negative.
+    // For negative numbers, we invert the bytes, so that the sort order
+    // is reversed (so that larger negative numbers come first).
+    int n = (exponent & 0x7f00) >> 8;
+    Assert(exponent >= 0);
+    Assert(exponent < 128);
+    string digits;
+    digits.push_back(negative ? 127 - n : 128 + n);
+    n = exponent & 0xff;
+    digits.push_back(negative ? 255 - n: n);
+    // Now, store the mantissa, in 7 bytes.
+    // For negative numbers, we invert the bytes, as for the exponent.
+    // Mantissa is in range .5 <= m < 1.
+    //
+    // Therefore, we first multiply by 512 and subtract 256, to get the first
+    // byte.  For subsequent bytes, we multiply by 256.
+    mantissa = mantissa * 512 - 256;
+    Assert(mantissa >= 0);
+    Assert(mantissa < 256);
+    int i;
+    for (i = 0; i != 7; ++i) {
+        n = static_cast<int>(floor(mantissa));
+        digits.push_back(negative ? 255 - n : n);
+        mantissa -= n;
+        Assert(mantissa >= 0);
+        Assert(mantissa < 1.0);
+        mantissa *= 256;
+    }
+    // Finally, we can chop off any trailing zeros.
+    i = digits.size();
+    while (i > 0 && digits[i - 1] == '\0') {
+        i--;
+    }
+    digits.resize(i);
+    return digits;
+}
+/// Get a number from the character at a given position in a string, returning
+/// 0 if the string isn't long enough.
+static inline unsigned int
+numfromstr(const std::string & str, std::string::size_type pos)
+{
+    return (str.size() > pos) ? static_cast<unsigned char>(str[pos]) : 0;
+}
+double
+Xapian::NumberValueRangeProcessor::string_to_float(const std::string & value)
+{
+    // Read the exponent
+    unsigned int n = numfromstr(value, 0);
+    bool negative = (n < 128);
+    int exponent = (negative ? 127 - n : n - 128) << 8;
+    n = numfromstr(value, 1);
+    exponent += negative ? 255 - n : n;
+    exponent -= 1075;
+    // Read the mantissa
+    double mantissa = 0;
+    for (int i = 8; i != 2; --i)
+    {
+        n = numfromstr(value, i);
+        double byteval(negative ? 255 - n : n);
+        mantissa += ldexp(byteval, 8 * (1 - i) - 1);
+    }
+    n = numfromstr(value, 2);
+    if (negative) n = 255 - n;
+    n += 256;
+    mantissa += ldexp(n, -9);
+    return (negative ? -1 : 1) * ldexp(mantissa, exponent);
+}

Download in other formats:

Original Format