Ticket #165: patch
File patch, 16.7 KB (added by , 17 years ago) |
---|
-
tests/queryparsertest.cc
22 22 #include <xapian.h> 23 23 #include <iostream> 24 24 #include <string> 25 #include <math.h> 25 26 #include "utils.h" 26 27 27 28 using namespace std; … … 1040 1041 1041 1042 static test test_value_range2_queries[] = { 1042 1043 { "a..b", "VALUE_RANGE 3 a b" }, 1043 { "1..12", "VALUE_RANGE 2 1 12" },1044 { "1..12", "VALUE_RANGE 2 \2044 \2047\200" }, 1044 1045 { "20070201..20070228", "VALUE_RANGE 1 20070201 20070228" }, 1045 { "$10..20", "VALUE_RANGE 4 10 20" },1046 { "$10..$20", "VALUE_RANGE 4 10 20" },1047 { "12..42kg", "VALUE_RANGE 5 12 42" },1048 { "12kg..42kg", "VALUE_RANGE 5 12 42" },1046 { "$10..20", "VALUE_RANGE 4 \2047@ \2048@" }, 1047 { "$10..$20", "VALUE_RANGE 4 \2047@ \2048@" }, 1048 { "12..42kg", "VALUE_RANGE 5 \2047\200 \2049P" }, 1049 { "12kg..42kg", "VALUE_RANGE 5 \2047\200 \2049P" }, 1049 1050 { "12kg..42", "VALUE_RANGE 3 12kg 42" }, 1050 1051 { "10..$20", "VALUE_RANGE 3 10 $20" }, 1051 1052 { "1999-03-12..2020-12-30", "VALUE_RANGE 1 19990312 20201230" }, … … 1054 1055 { "12/03/99..12/04/01", "VALUE_RANGE 1 19990312 20010412" }, 1055 1056 { "03-12-99..04-14-01", "VALUE_RANGE 1 19990312 20010414" }, 1056 1057 { "(test:a..test:b hello)", "(hello:(pos=1) FILTER VALUE_RANGE 3 test:a test:b)" }, 1057 { "12..42kg 5..6kg 1..12", "(VALUE_RANGE 2 1 12 AND (VALUE_RANGE 5 12 42 OR VALUE_RANGE 5 5 6))" },1058 { "12..42kg 5..6kg 1..12", "(VALUE_RANGE 2 \2044 \2047\200 AND (VALUE_RANGE 5 \2047\200 \2049P OR VALUE_RANGE 5 \2046@ \2046\200))" }, 1058 1059 { NULL, NULL } 1059 1060 }; 1060 1061 … … 1096 1097 return true; 1097 1098 } 1098 1099 1099 // Test NumberValueRangeProcessors with actual data. .1100 // Test NumberValueRangeProcessors with actual data. 1100 1101 static bool test_qp_value_range3() 1101 1102 { 1102 1103 Xapian::WritableDatabase db(Xapian::InMemory::open()); 1103 int low = 0; // FIXME - should it work with negative numbers?1104 // If so, test it with some by setting low to -101105 int high = 9; // Currently the test passes if high is 9, but not if it is 10.1104 double low = -10; 1105 double high = 20; 1106 double step = 0.5; 1106 1107 1107 for ( int i = low; i <= high; ++i) {1108 for (double i = low; i <= high; i += step) { 1108 1109 Xapian::Document doc; 1109 doc.add_value(1, om_tostring(i)); 1110 doc.add_value(1, Xapian::NumberValueRangeProcessor::float_to_string(i)); 1111 tout << "Value: " << i << " = " << Xapian::NumberValueRangeProcessor::float_to_string(i) << "\n"; 1110 1112 db.add_document(doc); 1111 1113 } 1112 1114 … … 1114 1116 Xapian::QueryParser qp; 1115 1117 qp.add_valuerangeprocessor(&vrp_num); 1116 1118 1117 for ( int start = low; start <= high; ++start) {1118 for ( int end = low; end <= high; ++end) {1119 for (double start = low; start <= high; start += step) { 1120 for (double end = low; end <= high; end += step) { 1119 1121 string query = om_tostring(start) + ".." + om_tostring(end); 1120 1122 tout << "Query: " << query << '\n'; 1121 1123 Xapian::Query qobj = qp.parse_query(query); 1124 tout << "Qobj: " << qobj.get_description() << '\n'; 1122 1125 Xapian::Enquire enq(db); 1123 1126 enq.set_query(qobj); 1124 Xapian::MSet mset = enq.get_mset(0, 1 + high - low);1127 Xapian::MSet mset = enq.get_mset(0, 1 + static_cast<int>(floor((high - low) / step))); 1125 1128 if (end < start) { 1126 1129 TEST_EQUAL(mset.size(), 0); 1127 1130 } else { 1128 //TEST_EQUAL(mset.size(), 1u + end - start); 1131 tout << "Expect " << mset.size() << " == " << (1u + (end - start) / step) << "\n"; 1132 TEST_EQUAL(mset.size(), 1u + (end - start) / step); 1129 1133 for (unsigned int j = 0; j != mset.size(); j++) { 1130 1134 TEST_EQUAL(mset[j].get_document().get_value(1), 1131 om_tostring(static_cast<int>(j)+ start));1135 Xapian::NumberValueRangeProcessor::float_to_string(j * step + start)); 1132 1136 } 1133 1137 } 1134 1138 } … … 1136 1140 return true; 1137 1141 } 1138 1142 1143 static double test_value_range_numbers[] = { 1144 -pow(2, 1022), 1145 -1024.5, 1146 -3.14159265358979323846, 1147 -2, 1148 -1.8, 1149 -1.1, 1150 -1, 1151 -0.5, 1152 -0.2, 1153 -0.1, 1154 -0.000005, 1155 -0.000002, 1156 -0.000001, 1157 -pow(2, -1023), 1158 -pow(2, -1024), 1159 -pow(2, -1074), 1160 0, 1161 pow(2, -1074), 1162 pow(2, -1024), 1163 pow(2, -1023), 1164 0.000001, 1165 0.000002, 1166 0.000005, 1167 0.1, 1168 0.2, 1169 0.5, 1170 1, 1171 1.1, 1172 1.8, 1173 2, 1174 3.14159265358979323846, 1175 1024.5, 1176 pow(2, 1022), 1177 1178 64 // Magic number which we stop at. 1179 }; 1180 1181 // Test serialisation and unserialisation of various numbers. 1182 static bool test_value_range_serialise1() 1183 { 1184 double prevnum = 0; 1185 string prevstr = ""; 1186 bool started = false; 1187 for (double *p = test_value_range_numbers; *p != 64; ++p) { 1188 double num = *p; 1189 tout << "Number: " << num << '\n'; 1190 string str = Xapian::NumberValueRangeProcessor::float_to_string(num); 1191 tout << "String: " << str << '\n'; 1192 TEST_EQUAL(Xapian::NumberValueRangeProcessor::string_to_float(str), num); 1193 1194 if (started) { 1195 TEST_AND_EXPLAIN(prevnum < num, "Expected previous number (" << 1196 prevnum << ") to be less than current number (" << 1197 num << ")"); 1198 TEST_AND_EXPLAIN(prevstr < str, "Expected previous string (" << 1199 prevstr << ") to be less than current string (" << 1200 str << ")"); 1201 } 1202 1203 prevnum = num; 1204 prevstr = str; 1205 started = true; 1206 } 1207 return true; 1208 } 1209 1139 1210 static test test_value_daterange1_queries[] = { 1140 1211 { "12/03/99..12/04/01", "VALUE_RANGE 1 19991203 20011204" }, 1141 1212 { "03-12-99..04-14-01", "VALUE_RANGE 1 19990312 20010414" }, … … 1272 1343 TESTCASE(qp_value_range2), 1273 1344 TESTCASE(qp_value_range3), 1274 1345 TESTCASE(qp_value_daterange1), 1346 TESTCASE(value_range_serialise1), 1275 1347 TESTCASE(qp_value_customrange1), 1276 1348 TESTCASE(qp_stoplist1), 1277 1349 END_OF_TESTCASES -
include/xapian/queryparser.h
152 152 153 153 /** Handle a number range. 154 154 * 155 * This class currently has a design bug - a string comparison is used so the 156 * numbers must be the same length for it to work, but you can't just zero 157 * pad the values in the database because those from the query aren't. We 158 * therefore recommend that you avoid using this class at present. 155 * This class requires that the values stored which the range is being applied 156 * to are numbers which have been converted to strings using its \a 157 * float_to_string() method. This method produces strings which will sort in 158 * numeric order, so you can use it if you want to be able to sort based on 159 * the value in numeric order, too. 159 160 */ 160 161 class XAPIAN_VISIBILITY_DEFAULT NumberValueRangeProcessor : public ValueRangeProcessor { 161 162 Xapian::valueno valno; … … 163 164 std::string str; 164 165 165 166 public: 167 /** Constructor. 168 * 169 * @param valno_ The value number to return from operator(). 170 */ 166 171 NumberValueRangeProcessor(Xapian::valueno valno_) 167 172 : valno(valno_), prefix(false) { } 168 173 174 /** Constructor. 175 * 176 * @param valno_ The value number to return from operator(). 177 * 178 * @param str_ A string to look for to recognise values as belonging 179 * to this numeric range. 180 * 181 * @param prefix_ Whether to look for the string at the start or end of 182 * the values. If true, the string is a prefix; if 183 * false, the string is a suffix. 184 * 185 * The string supplied in str_ is used by \a operator() to decide whether 186 * the pair of strings supplied to it constitute a valid range. If 187 * prefix_ is true, the first value in a range must begin with str_ (and 188 * the second value may also begin with str_, but this is not compulsory); 189 * if prefix_ is false, the second value in a range must end with str_ 190 * (and the first value may also end with str_, but this is not 191 * compulsory). 192 * 193 * If str_ is empty, the setting of prefix_ is irrelevant, and no special 194 * strings are required at the start or end of the strings defining the 195 * range. 196 * 197 * The remainder of both strings defining the endpoints must be valid 198 * floating point numbers, as defined by the ANSI C standard library 199 * function `strtod()`. 200 * 201 * For example, if str_ is "$" and prefix_ is true, and the range 202 * processor has been added to the queryparser, the queryparser will 203 * accept "$10..50" or "$10..50", but not "10..50" or "10..$50" as valid 204 * ranges. If str_ is "kg" and prefix_ is false, the queryparser will 205 * accept "10..50kg" or "10kg..50kg", but not "10..50" or "10kg..50" as 206 * valid ranges. 207 */ 169 208 NumberValueRangeProcessor(Xapian::valueno valno_, const std::string &str_, 170 209 bool prefix_ = true) 171 210 : valno(valno_), prefix(prefix_), str(str_) { } 172 211 212 /** See if <begin>..<end> is a valid numeric value range. 213 * 214 * If <begin>..<end> is a valid numeric value range, and has the 215 * appropriate prefix or suffix (if specified) required for this 216 * NumberValueRangeProcessor, this method returns the value number of 217 * range filter on, and sets begin and end to the appropriate serialised 218 * values needed to delimit the range. Otherwise it returns 219 * Xapian::BAD_VALUENO. 220 */ 173 221 Xapian::valueno operator()(std::string &begin, std::string &end); 222 223 /** Convert a floating point number to a string, preserving sort order. 224 * 225 * This method converts a floating point number to a string, suitable for 226 * using as a value for numeric range restriction, or for use as a sort 227 * key. 228 * 229 * The conversion attempts to ensure that, for any pair of values supplied 230 * to the conversion algorithm, the result of comparing the original 231 * values (with a numeric comparison operator) will be the same as the 232 * result of comparing the resulting values (with a string comparison 233 * operator). On platforms which represent doubles with the precisions 234 * specified by IEEE_754, this will be the case: if the representation of 235 * doubles is more precise, it is possible that two very close doubles 236 * will be mapped to the same string, so will compare equal. 237 * 238 * The conversion is platform independent. 239 */ 240 static std::string float_to_string(double value); 241 242 /** Convert a string to a floating point number. 243 * 244 * This expects the input to be a string produced by \a float_to_string(). 245 * If the input is not such a string, the value returned is undefined (but 246 * no error will be thrown). 247 * 248 * The result of the conversion will be exactly the value which was 249 * supplied to \a string_to_float() when making the string on platforms 250 * which represent doubles with the precisions specified by IEEE_754, but 251 * may be a different (nearby) value on other platforms. 252 */ 253 static double string_to_float(const std::string & value); 254 174 255 }; 175 256 176 257 /// Build a Xapian::Query object from a user query string. -
api/valuerangeproc.cc
22 22 23 23 #include <xapian/queryparser.h> 24 24 25 #include <math.h> 25 26 #include <stdio.h> 26 27 #include <stdlib.h> 27 28 28 29 #include <string> 29 30 #include "stringutils.h" 31 #include "safeerrno.h" 32 #include "omassert.h" 30 33 31 34 using namespace std; 32 35 … … 151 154 152 155 if (str.size()) { 153 156 if (prefix) { 154 // If there's a prefix, require it on the start .157 // If there's a prefix, require it on the start of the range. 155 158 if (!begins_with(begin, str)) { 156 159 // Prefix not given. 157 160 return Xapian::BAD_VALUENO; 158 161 } 159 162 b_b = str.size(); 160 // But it's optional on the end , e.g. $10..50163 // But it's optional on the end of the range, e.g. $10..50 161 164 if (begins_with(end, str)) { 162 165 e_b = str.size(); 163 166 } 164 167 } else { 165 // If there's a suffix, require it on the end .168 // If there's a suffix, require it on the end of the range. 166 169 if (!ends_with(end, str)) { 167 // Prefix not given.170 // Suffix not given. 168 171 return Xapian::BAD_VALUENO; 169 172 } 170 173 e_e = end.size() - str.size(); 171 // But it's optional on the start , e.g. 10..50kg174 // But it's optional on the start of the range, e.g. 10..50kg 172 175 if (ends_with(begin, str)) { 173 176 b_e = begin.size() - str.size(); 174 177 } 175 178 } 176 179 } 177 180 178 if (begin.find_first_not_of("0123456789", b_b) != b_e)179 // Not a number.180 return Xapian::BAD_VALUENO;181 182 if (end.find_first_not_of("0123456789", e_b) != e_e)183 // Not a number.184 return Xapian::BAD_VALUENO;185 186 181 // Adjust begin string if necessary. 187 182 if (b_b) 188 183 begin.erase(0, b_b); … … 195 190 else if (e_e != string::npos) 196 191 end.resize(e_e); 197 192 193 194 // Parse the numbers to floating point. 195 double beginnum, endnum; 196 const char * startptr; 197 char * endptr; 198 199 errno = 0; 200 startptr = begin.c_str(); 201 beginnum = strtod(startptr, &endptr); 202 if (endptr != startptr + begin.size()) 203 // Invalid characters in string 204 return Xapian::BAD_VALUENO; 205 if (errno) 206 // Overflow or underflow 207 return Xapian::BAD_VALUENO; 208 209 errno = 0; 210 startptr = end.c_str(); 211 endnum = strtod(startptr, &endptr); 212 if (endptr != startptr + end.size()) 213 // Invalid characters in string 214 return Xapian::BAD_VALUENO; 215 if (errno) 216 // Overflow or underflow 217 return Xapian::BAD_VALUENO; 218 219 begin.assign(float_to_string(beginnum)); 220 end.assign(float_to_string(endnum)); 221 198 222 return valno; 199 223 } 224 225 string 226 Xapian::NumberValueRangeProcessor::float_to_string(double value) 227 { 228 double mantissa; 229 int exponent; 230 231 mantissa = frexp(value, &exponent); 232 233 bool negative = false; 234 if (mantissa < 0) { 235 negative = true; 236 mantissa = -mantissa; 237 } 238 239 /* IEEE representation of doubles uses 11 bits for the exponent, with a 240 * bias of 1023. There's then another 52 bits in the mantissa, so we need 241 * to add 1075 to be sure that the exponent won't be negative. Even then, 242 * we check that the exponent isn't negative, and consider the value to be 243 * equal to zero if it is, to be safe on architectures which use a 244 * different representation. 245 */ 246 exponent += 1075; 247 if (exponent < 0) { 248 /* Note - this can't happen on most architectures. */ 249 exponent = 0; 250 mantissa = 0; 251 negative = false; 252 } else if (mantissa == 0) { 253 exponent = 0; 254 } 255 256 // First, store the exponent, as two bytes 257 // Top bit of first byte is a sign bit. 258 // If the sign bit is set, number is positive. 259 // If the sign bit is unset, number is negative. 260 // For negative numbers, we invert the bytes, so that the sort order 261 // is reversed (so that larger negative numbers come first). 262 int n = (exponent & 0x7f00) >> 8; 263 Assert(exponent >= 0); 264 Assert(exponent < 128); 265 string digits; 266 digits.push_back(negative ? 127 - n : 128 + n); 267 268 n = exponent & 0xff; 269 digits.push_back(negative ? 255 - n: n); 270 271 // Now, store the mantissa, in 7 bytes. 272 // For negative numbers, we invert the bytes, as for the exponent. 273 // Mantissa is in range .5 <= m < 1. 274 // 275 // Therefore, we first multiply by 512 and subtract 256, to get the first 276 // byte. For subsequent bytes, we multiply by 256. 277 mantissa = mantissa * 512 - 256; 278 Assert(mantissa >= 0); 279 Assert(mantissa < 256); 280 int i; 281 for (i = 0; i != 7; ++i) { 282 n = static_cast<int>(floor(mantissa)); 283 digits.push_back(negative ? 255 - n : n); 284 mantissa -= n; 285 Assert(mantissa >= 0); 286 Assert(mantissa < 1.0); 287 mantissa *= 256; 288 } 289 290 // Finally, we can chop off any trailing zeros. 291 i = digits.size(); 292 while (i > 0 && digits[i - 1] == '\0') { 293 i--; 294 } 295 digits.resize(i); 296 297 return digits; 298 } 299 300 /// Get a number from the character at a given position in a string, returning 301 /// 0 if the string isn't long enough. 302 static inline unsigned int 303 numfromstr(const std::string & str, std::string::size_type pos) 304 { 305 return (str.size() > pos) ? static_cast<unsigned char>(str[pos]) : 0; 306 } 307 308 double 309 Xapian::NumberValueRangeProcessor::string_to_float(const std::string & value) 310 { 311 // Read the exponent 312 unsigned int n = numfromstr(value, 0); 313 bool negative = (n < 128); 314 int exponent = (negative ? 127 - n : n - 128) << 8; 315 n = numfromstr(value, 1); 316 exponent += negative ? 255 - n : n; 317 exponent -= 1075; 318 319 // Read the mantissa 320 double mantissa = 0; 321 322 for (int i = 8; i != 2; --i) 323 { 324 n = numfromstr(value, i); 325 double byteval(negative ? 255 - n : n); 326 mantissa += ldexp(byteval, 8 * (1 - i) - 1); 327 } 328 329 n = numfromstr(value, 2); 330 if (negative) n = 255 - n; 331 n += 256; 332 mantissa += ldexp(n, -9); 333 334 return (negative ? -1 : 1) * ldexp(mantissa, exponent); 335 }