Ticket #165: numvaluerange.patch
File numvaluerange.patch, 10.0 KB (added by , 17 years ago) |
---|
-
tests/queryparsertest.cc
22 22 #include <xapian.h> 23 23 #include <iostream> 24 24 #include <string> 25 #include <math.h> 25 26 #include "utils.h" 26 27 27 28 using namespace std; … … 1040 1041 1041 1042 static test test_value_range2_queries[] = { 1042 1043 { "a..b", "VALUE_RANGE 3 a b" }, 1043 { "1..12", "VALUE_RANGE 2 1 12" },1044 { "1..12", "VALUE_RANGE 2 \2044 \2047\200" }, 1044 1045 { "20070201..20070228", "VALUE_RANGE 1 20070201 20070228" }, 1045 { "$10..20", "VALUE_RANGE 4 10 20" },1046 { "$10..$20", "VALUE_RANGE 4 10 20" },1047 { "12..42kg", "VALUE_RANGE 5 12 42" },1048 { "12kg..42kg", "VALUE_RANGE 5 12 42" },1046 { "$10..20", "VALUE_RANGE 4 \2047@ \2048@" }, 1047 { "$10..$20", "VALUE_RANGE 4 \2047@ \2048@" }, 1048 { "12..42kg", "VALUE_RANGE 5 \2047\200 \2049P" }, 1049 { "12kg..42kg", "VALUE_RANGE 5 \2047\200 \2049P" }, 1049 1050 { "12kg..42", "VALUE_RANGE 3 12kg 42" }, 1050 1051 { "10..$20", "VALUE_RANGE 3 10 $20" }, 1051 1052 { "1999-03-12..2020-12-30", "VALUE_RANGE 1 19990312 20201230" }, … … 1054 1055 { "12/03/99..12/04/01", "VALUE_RANGE 1 19990312 20010412" }, 1055 1056 { "03-12-99..04-14-01", "VALUE_RANGE 1 19990312 20010414" }, 1056 1057 { "(test:a..test:b hello)", "(hello:(pos=1) FILTER VALUE_RANGE 3 test:a test:b)" }, 1057 { "12..42kg 5..6kg 1..12", "(VALUE_RANGE 2 1 12 AND (VALUE_RANGE 5 12 42 OR VALUE_RANGE 5 5 6))" },1058 { "12..42kg 5..6kg 1..12", "(VALUE_RANGE 2 \2044 \2047\200 AND (VALUE_RANGE 5 \2047\200 \2049P OR VALUE_RANGE 5 \2046@ \2046\200))" }, 1058 1059 { NULL, NULL } 1059 1060 }; 1060 1061 … … 1102 1103 Xapian::WritableDatabase db(Xapian::InMemory::open()); 1103 1104 int low = 0; // FIXME - should it work with negative numbers? 1104 1105 // If so, test it with some by setting low to -10 1105 int high = 9; // Currently the test passes if high is 9, but not if it is 10.1106 int high = 100; // Currently the test passes if high is 9, but not if it is 10. 1106 1107 1107 1108 for (int i = low; i <= high; ++i) { 1108 1109 Xapian::Document doc; 1109 doc.add_value(1, om_tostring(i));1110 doc.add_value(1, Xapian::NumberValueRangeProcessor::float_to_string(i)); 1110 1111 db.add_document(doc); 1111 1112 } 1112 1113 … … 1125 1126 if (end < start) { 1126 1127 TEST_EQUAL(mset.size(), 0); 1127 1128 } else { 1128 //TEST_EQUAL(mset.size(), 1u + end - start);1129 TEST_EQUAL(mset.size(), 1u + end - start); 1129 1130 for (unsigned int j = 0; j != mset.size(); j++) { 1130 1131 TEST_EQUAL(mset[j].get_document().get_value(1), 1131 om_tostring(static_cast<int>(j) + start));1132 Xapian::NumberValueRangeProcessor::float_to_string(static_cast<int>(j) + start)); 1132 1133 } 1133 1134 } 1134 1135 } … … 1136 1137 return true; 1137 1138 } 1138 1139 1140 static double test_value_range_numbers[] = { 1141 -pow(2, 1022), 1142 -1024.5, 1143 -3.14159265358979323846, 1144 -2, 1145 -1.8, 1146 -1.1, 1147 -1, 1148 -0.5, 1149 -0.2, 1150 -0.1, 1151 -0.000005, 1152 -0.000002, 1153 -0.000001, 1154 -pow(2, -1023), 1155 -pow(2, -1024), 1156 -pow(2, -1074), 1157 0, 1158 pow(2, -1074), 1159 pow(2, -1024), 1160 pow(2, -1023), 1161 0.000001, 1162 0.000002, 1163 0.000005, 1164 0.1, 1165 0.2, 1166 0.5, 1167 1, 1168 1.1, 1169 1.8, 1170 2, 1171 3.14159265358979323846, 1172 1024.5, 1173 pow(2, 1022), 1174 1175 64 // Magic number which we stop at. 1176 }; 1177 1178 // Test serialisation and unserialisation of various numbers and string. 1179 static bool test_value_range_serialise1() 1180 { 1181 double prevnum = 0; 1182 string prevstr = ""; 1183 bool started = false; 1184 for (double *p = test_value_range_numbers; *p != 64; ++p) { 1185 double num = *p; 1186 tout << "Number: " << num << '\n'; 1187 string str = Xapian::NumberValueRangeProcessor::float_to_string(num); 1188 tout << "String: " << str << '\n'; 1189 TEST_EQUAL(Xapian::NumberValueRangeProcessor::string_to_float(str), num); 1190 1191 if (started) { 1192 TEST_AND_EXPLAIN(prevnum < num, "Expected previous number (" << 1193 prevnum << ") to be less than current number (" << 1194 num << ")"); 1195 TEST_AND_EXPLAIN(prevstr < str, "Expected previous string (" << 1196 prevstr << ") to be less than current string (" << 1197 str << ")"); 1198 } 1199 1200 prevnum = num; 1201 prevstr = str; 1202 started = true; 1203 } 1204 return true; 1205 } 1206 1139 1207 static test test_value_daterange1_queries[] = { 1140 1208 { "12/03/99..12/04/01", "VALUE_RANGE 1 19991203 20011204" }, 1141 1209 { "03-12-99..04-14-01", "VALUE_RANGE 1 19990312 20010414" }, … … 1272 1340 TESTCASE(qp_value_range2), 1273 1341 TESTCASE(qp_value_range3), 1274 1342 TESTCASE(qp_value_daterange1), 1343 TESTCASE(value_range_serialise1), 1275 1344 TESTCASE(qp_value_customrange1), 1276 1345 TESTCASE(qp_stoplist1), 1277 1346 END_OF_TESTCASES -
include/xapian/queryparser.h
171 171 : valno(valno_), prefix(prefix_), str(str_) { } 172 172 173 173 Xapian::valueno operator()(std::string &begin, std::string &end); 174 175 static std::string float_to_string(double value); 176 static double string_to_float(const std::string & value); 177 174 178 }; 175 179 176 180 /// Build a Xapian::Query object from a user query string. -
api/valuerangeproc.cc
22 22 23 23 #include <xapian/queryparser.h> 24 24 25 #include <math.h> 25 26 #include <stdio.h> 26 27 #include <stdlib.h> 27 28 28 29 #include <string> 29 30 #include "stringutils.h" 31 #include "safeerrno.h" 32 #include "omassert.h" 30 33 31 34 using namespace std; 32 35 … … 175 178 } 176 179 } 177 180 178 if (begin.find_first_not_of("0123456789", b_b) != b_e)179 // Not a number.180 return Xapian::BAD_VALUENO;181 182 if (end.find_first_not_of("0123456789", e_b) != e_e)183 // Not a number.184 return Xapian::BAD_VALUENO;185 186 181 // Adjust begin string if necessary. 187 182 if (b_b) 188 183 begin.erase(0, b_b); … … 195 190 else if (e_e != string::npos) 196 191 end.resize(e_e); 197 192 193 194 // Parse the numbers to floating point. 195 double beginnum, endnum; 196 const char * startptr; 197 char * endptr; 198 199 errno = 0; 200 startptr = begin.c_str(); 201 beginnum = strtod(startptr, &endptr); 202 if (endptr != startptr + begin.size()) 203 // Invalid characters in string 204 return Xapian::BAD_VALUENO; 205 if (errno) 206 // Overflow or underflow 207 return Xapian::BAD_VALUENO; 208 209 errno = 0; 210 startptr = end.c_str(); 211 endnum = strtod(startptr, &endptr); 212 if (endptr != startptr + end.size()) 213 // Invalid characters in string 214 return Xapian::BAD_VALUENO; 215 if (errno) 216 // Overflow or underflow 217 return Xapian::BAD_VALUENO; 218 219 begin.assign(float_to_string(beginnum)); 220 end.assign(float_to_string(endnum)); 221 198 222 return valno; 199 223 } 224 225 string 226 Xapian::NumberValueRangeProcessor::float_to_string(double value) 227 { 228 double mantissa; 229 int exponent; 230 231 mantissa = frexp(value, &exponent); 232 233 bool negative = false; 234 if (mantissa < 0) { 235 negative = true; 236 mantissa = -mantissa; 237 } 238 239 /* IEEE representation of doubles uses 11 bits for the exponent, with a 240 * bias of 1023. There's then another 52 bits in the mantissa, so we need 241 * to add 1075 to be sure that the exponent won't be negative. Even then, 242 * we check that the exponent isn't negative, and consider the value to be 243 * equal to zero if it is, to be safe on architectures which use a 244 * different representation. 245 */ 246 exponent += 1075; 247 if (exponent < 0) { 248 /* Note - this can't happen on most architectures. */ 249 exponent = 0; 250 mantissa = 0; 251 negative = false; 252 } else if (mantissa == 0) { 253 exponent = 0; 254 } 255 256 // First, store the exponent, as two bytes 257 // Top bit of first byte is a sign bit. 258 // If the sign bit is set, number is positive. 259 // If the sign bit is unset, number is negative. 260 // For negative numbers, we invert the bytes, so that the sort order 261 // is reversed (so that larger negative numbers come first). 262 int n = (exponent & 0x7f00) >> 8; 263 Assert(exponent >= 0); 264 Assert(exponent < 128); 265 string digits; 266 digits.push_back(negative ? 127 - n : 128 + n); 267 268 n = exponent & 0xff; 269 digits.push_back(negative ? 255 - n: n); 270 271 // Now, store the mantissa, in 7 bytes. 272 // For negative numbers, we invert the bytes, as for the exponent. 273 // Mantissa is in range .5 <= m < 1. 274 // 275 // Therefore, we first multiply by 512 and subtract 256, to get the first 276 // byte. For subsequent bytes, we multiply by 256. 277 mantissa = mantissa * 512 - 256; 278 Assert(mantissa >= 0); 279 Assert(mantissa < 256); 280 int i; 281 for (i = 0; i != 7; ++i) { 282 n = static_cast<int>(floor(mantissa)); 283 digits.push_back(negative ? 255 - n : n); 284 mantissa -= n; 285 Assert(mantissa >= 0); 286 Assert(mantissa < 1.0); 287 mantissa *= 256; 288 } 289 290 // Finally, we can chop off any trailing zeros. 291 i = digits.size(); 292 while (i > 0 && digits[i - 1] == '\0') { 293 i--; 294 } 295 digits.resize(i); 296 297 return digits; 298 } 299 300 /// Get a number from the character at a given position in a string, returning 301 /// 0 if the string isn't long enough. 302 static inline unsigned int 303 numfromstr(const std::string & str, std::string::size_type pos) 304 { 305 return (str.size() > pos) ? static_cast<unsigned char>(str[pos]) : 0; 306 } 307 308 double 309 Xapian::NumberValueRangeProcessor::string_to_float(const std::string & value) 310 { 311 // Read the exponent 312 unsigned int n = numfromstr(value, 0); 313 bool negative = (n < 128); 314 int exponent = (negative ? 127 - n : n - 128) << 8; 315 n = numfromstr(value, 1); 316 exponent += negative ? 255 - n : n; 317 exponent -= 1075; 318 319 // Read the mantissa 320 double mantissa = 0; 321 322 for (int i = 8; i != 2; --i) 323 { 324 n = numfromstr(value, i); 325 double byteval(negative ? 255 - n : n); 326 mantissa += ldexp(byteval, 8 * (1 - i) - 1); 327 } 328 329 n = numfromstr(value, 2); 330 if (negative) n = 255 - n; 331 n += 256; 332 mantissa += ldexp(n, -9); 333 334 return (negative ? -1 : 1) * ldexp(mantissa, exponent); 335 } 336