Ticket #157: boolgroups.patch
File boolgroups.patch, 11.4 KB (added by , 17 years ago) |
---|
-
queryparser/queryparser.lemony
25 25 #include "queryparser_internal.h" 26 26 #include <xapian/unicode.h> 27 27 #include "utils.h" 28 #include "autoptr.h" 28 29 29 30 // Include the list of token values lemon generates. 30 31 #include "queryparser_token.h" … … 100 101 /// Flag, true iff this represents a "MatchNothing" query. 101 102 bool match_nothing; 102 103 104 /// The value number that this represents, if it's a range query. 105 /// (Needed, since we can't get the value number back out of the "q" member.) 106 Xapian::valueno valno; 107 103 108 public: 104 109 QpQuery(const QpQuery & tocopy) 105 : q(tocopy.q), match_nothing(tocopy.match_nothing) 110 : q(tocopy.q), match_nothing(tocopy.match_nothing), 111 valno(Xapian::BAD_VALUENO) 106 112 {} 107 113 108 114 QpQuery & operator=(const QpQuery & tocopy) … … 115 121 /** A query consisting of a single term. */ 116 122 QpQuery(const std::string & tname, Xapian::termcount wqf, 117 123 Xapian::termpos pos) 118 : q(tname, wqf, pos), match_nothing(false) 124 : q(tname, wqf, pos), match_nothing(false), 125 valno(Xapian::BAD_VALUENO) 119 126 {} 120 127 121 128 /** A query consisting of two subqueries, combined with operator op. */ 122 129 QpQuery(Query::op op, const QpQuery & left, const QpQuery & right) 123 : q(op, left.get(), right.get()), match_nothing(false) 130 : q(op, left.get(), right.get()), match_nothing(false), 131 valno(Xapian::BAD_VALUENO) 124 132 {} 125 133 126 QpQuery(Query::op op_, Xapian::valueno valno ,134 QpQuery(Query::op op_, Xapian::valueno valno_, 127 135 const std::string &begin, const std::string &end) 128 : q(op_, valno, begin, end), match_nothing(false) 136 : q(op_, valno_, begin, end), match_nothing(false), 137 valno(valno_) 129 138 {} 130 139 131 QpQuery(const Query & q_) : q(q_), match_nothing(false) {}132 QpQuery() : q(), match_nothing(false) {}133 QpQuery(bool m) : q(), match_nothing(m) {}140 QpQuery(const Query & q_) : q(q_), match_nothing(false), valno(Xapian::BAD_VALUENO) {} 141 QpQuery() : q(), match_nothing(false), valno(Xapian::BAD_VALUENO) {} 142 QpQuery(bool m) : q(), match_nothing(m), valno(Xapian::BAD_VALUENO) {} 134 143 Query & get() { return q; } 135 144 const Query & get() const { return q; } 145 Xapian::valueno get_valno() const { return valno; } 136 146 137 147 /// True iff the query is not empty, and doesn't explicitly match nothing. 138 148 bool can_match() { return (!q.empty() && !match_nothing); } … … 160 170 #endif 161 171 }; 162 172 173 /// A structure identifying a group of filter terms 174 struct filter_group_id { 175 /** The prefix of the filter terms. 176 * This is used for boolean filter terms. 177 */ 178 string prefix; 179 180 /** The value number of the filter terms. 181 * This is used for value range terms. 182 */ 183 Xapian::valueno valno; 184 185 /// Make a new filter_group_id for boolean filter terms. 186 explicit filter_group_id(const string & prefix_) : prefix(prefix_), valno(Xapian::BAD_VALUENO) {} 187 188 /// Make a new filter_group_id for value range terms. 189 explicit filter_group_id(Xapian::valueno valno_) : prefix(), valno(valno_) {} 190 191 /// Compare to another filter_group_id. 192 bool operator<(const filter_group_id & other) const { 193 if (prefix != other.prefix) { 194 return prefix < other.prefix; 195 } 196 return valno < other.valno; 197 } 198 }; 199 163 200 /** Class used to pass information about a token from lexer to parser. 164 201 * 165 202 * Generally a this class carries term information, but it can be used for the … … 177 214 bool stem; 178 215 termpos pos; 179 216 180 std::string make_term() const;181 182 217 public: 183 218 Term(const string &name_, termpos pos_) : name(name_), stem(false), pos(pos_) { } 184 219 Term(const string &name_) : name(name_), stem(false), pos(0) { } 220 Term(const string &name_, const string &prefix_) 221 : name(name_), prefix(prefix_), stem(false), pos(0) { } 185 222 Term(termpos pos_) : stem(false), pos(pos_) { } 186 223 Term(State * state_, const string &name_, const string &prefix_, 187 224 const string &unstemmed_, bool stem_, termpos pos_) 188 225 : state(state_), name(name_), prefix(prefix_), unstemmed(unstemmed_), 189 226 stem(stem_), pos(pos_) { } 190 227 228 std::string make_term() const; 229 191 230 void dont_stem() { stem = false; } 192 231 193 232 termpos get_termpos() const { return pos; } 194 233 234 filter_group_id get_filter_group_id() const { return filter_group_id(prefix); } 235 195 236 QpQuery * as_query() const { return new QpQuery(make_term(), 1, pos); } 196 237 197 238 QpQuery * as_wildcarded_query(State * state) const; … … 589 630 // until the next space or ')' as part of the boolean 590 631 // term. 591 632 it = p; 592 if (prefix_needs_colon(prefix, *it)) 593 prefix += ':'; 594 string term; 633 string name; 595 634 while (it != end && *it > ' ' && *it != ')') 596 Unicode::append_utf8( term, *it++);597 prefix += term;635 Unicode::append_utf8(name, *it++); 636 AutoPtr<Term> term(new Term(name, prefix)); 598 637 field += ':'; 599 field += term; 600 unstem.insert(make_pair(prefix, field)); 601 Parse(pParser, BOOLEAN_FILTER, new Term(prefix), 602 &state); 638 field += name; 639 unstem.insert(make_pair(term->make_term(), field)); 640 Parse(pParser, BOOLEAN_FILTER, term.release(), &state); 603 641 continue; 604 642 } 605 643 … … 858 896 QpQuery query; 859 897 QpQuery love; 860 898 QpQuery hate; 861 QpQuery filter; 899 900 // filter is a map from prefix to a query for that prefix. Queries with 901 // the same prefix are combined with OR, and the results of this are 902 // combined with AND to get the full filter. 903 map<filter_group_id, QpQuery> filter; 904 905 QpQuery merge_filters() const { 906 QpQuery q; 907 for (map<filter_group_id, QpQuery>::const_iterator i = filter.begin(); 908 i != filter.end(); ++i) 909 { 910 add_to_query(q, Query::OP_AND, i->second.get()); 911 } 912 return q; 913 } 862 914 }; 863 915 864 916 class TermList { … … 1078 1130 // Handle any boolean filters. 1079 1131 if (!P->filter.empty()) { 1080 1132 if (E->empty()) { 1081 *E = P-> filter;1133 *E = P->merge_filters(); 1082 1134 // FIXME and make the query boolean somehow... 1083 1135 } else { 1084 *E = QpQuery(Query::OP_FILTER, *E, P-> filter);1136 *E = QpQuery(Query::OP_FILTER, *E, P->merge_filters()); 1085 1137 } 1086 1138 } 1087 1139 // FIXME what if E->empty() (all terms are stopwords)? … … 1108 1160 yy_parse_failed(yypParser); 1109 1161 return; 1110 1162 } 1163 Xapian::valueno valno = range->get_valno(); 1111 1164 P = new ProbQuery; 1112 P->filter = *range;1165 P->filter[filter_group_id(valno)] = *range; 1113 1166 delete range; 1114 1167 } 1115 1168 … … 1121 1174 yy_parse_failed(yypParser); 1122 1175 return; 1123 1176 } 1177 Xapian::valueno valno = range->get_valno(); 1124 1178 P = Q; 1125 add_to_query(P->filter , Query::OP_AND, *range);1179 add_to_query(P->filter[filter_group_id(valno)], Query::OP_OR, *range); 1126 1180 delete range; 1127 1181 } 1128 1182 … … 1192 1246 1193 1247 prob(P) ::= BOOLEAN_FILTER(T). { 1194 1248 P = new ProbQuery; 1195 P->filter = T->as_query_object();1249 P->filter[T->get_filter_group_id()] = T->as_query_object(); 1196 1250 delete T; 1197 1251 } 1198 1252 1199 1253 prob(P) ::= stop_prob(Q) BOOLEAN_FILTER(T). { 1200 1254 P = Q; 1201 // FIXME we shouldOR filters with the same prefix...1202 add_to_query(P->filter , Query::OP_AND, T->as_query_object());1255 // We OR filters with the same prefix... 1256 add_to_query(P->filter[T->get_filter_group_id()], Query::OP_OR, T->as_query_object()); 1203 1257 delete T; 1204 1258 } 1205 1259 1206 1260 prob(P) ::= LOVE BOOLEAN_FILTER(T). { 1207 1261 // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER 1208 1262 P = new ProbQuery; 1209 P->filter = T->as_query_object();1263 P->filter[T->get_filter_group_id()] = T->as_query_object(); 1210 1264 delete T; 1211 1265 } 1212 1266 1213 1267 prob(P) ::= stop_prob(Q) LOVE BOOLEAN_FILTER(T). { 1214 1268 // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER 1215 1269 P = Q; 1216 // FIXME we shouldOR filters with the same prefix...1217 add_to_query(P->filter , Query::OP_AND, T->as_query_object());1270 // We OR filters with the same prefix... 1271 add_to_query(P->filter[T->get_filter_group_id()], Query::OP_OR, T->as_query_object()); 1218 1272 delete T; 1219 1273 } 1220 1274 -
tests/harness/testsuite.h
225 225 226 226 /// Test for equality of two things. 227 227 #define TEST_EQUAL(a, b) TEST_AND_EXPLAIN(((a) == (b)), \ 228 "Expected `"STRINGIZE(a)"' and `"STRINGIZE(b)"' to be equal: " \229 " were " << (a) << " and " << (b))228 "Expected `"STRINGIZE(a)"' and `"STRINGIZE(b)"' to be equal:\n" \ 229 " were " << (a) << "\n and " << (b)) 230 230 231 231 /** Test for equality of two strings. 232 232 * -
tests/queryparsertest.cc
532 532 { "- NEAR 12V voeding", "(near:(pos=1) OR 12v:(pos=2) OR Zvoed:(pos=3))" }, 533 533 { "waarom \"~\" in directorynaam", "(Zwaarom:(pos=1) OR Zin:(pos=2) OR Zdirectorynaam:(pos=3))" }, 534 534 { "cd'r NEAR toebehoren", "(cd'r:(pos=1) NEAR 11 toebehoren:(pos=2))" }, 535 { "site:1 site:2", "(H1 OR H2)" }, 536 { "site:1 site2:2", "(H1 AND J2)" }, 537 { "site:1 site:2 site2:2", "((H1 OR H2) AND J2)" }, 538 { "site:1 OR site:2", "(H1 OR H2)" }, 539 { "site:1 AND site:2", "(H1 AND H2)" }, 540 #if 0 541 { "A site:1 site:2", "(a FILTER (H1 OR H2))" }, 542 { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" }, 543 { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" }, 544 { "A site:1 site2:2", "(a FILTER (H1 AND J2))" }, 545 { "A site:1 site:2 site2:2", "(a FILTER ((H1 OR H2) AND J2))" }, 546 { "A site:1 OR site:2", "(a FILTER (H1 OR H2))" }, 547 { "A site:1 AND site:2", "(a FILTER (H1 AND H2))" }, 548 #endif 549 { "site:xapian.org OR site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" }, 550 { "site:xapian.org site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" }, 551 { "site:xapian.org AND site:www.xapian.org", "(Hxapian.org AND Hwww.xapian.org)" }, 552 { "Xapian site:xapian.org site:www.xapian.org", "(xapian:(pos=1) FILTER (Hxapian.org OR Hwww.xapian.org))" }, 535 553 { NULL, NULL } 536 554 }; 537 555 … … 576 594 queryparser.add_prefix("title", "XT"); 577 595 queryparser.add_prefix("subject", "XT"); 578 596 queryparser.add_boolean_prefix("site", "H"); 597 queryparser.add_boolean_prefix("site2", "J"); 579 598 for (test *p = test_or_queries; p->query; ++p) { 580 599 string expect, parsed; 581 600 if (p->expect) … … 969 988 { "hello a..b", "(hello:(pos=1) FILTER VALUE_RANGE 1 a b)" }, 970 989 { "hello a..b world", "((hello:(pos=1) OR world:(pos=2)) FILTER VALUE_RANGE 1 a b)" }, 971 990 { "hello a..b test:foo", "(hello:(pos=1) FILTER (VALUE_RANGE 1 a b AND XTESTfoo))" }, 991 { "hello a..b test:foo test:bar", "(hello:(pos=1) FILTER (VALUE_RANGE 1 a b AND (XTESTfoo OR XTESTbar)))" }, 992 { "hello a..b c..d test:foo", "(hello:(pos=1) FILTER ((VALUE_RANGE 1 a b OR VALUE_RANGE 1 c d) AND XTESTfoo))" }, 993 { "hello a..b c..d test:foo test:bar", "(hello:(pos=1) FILTER ((VALUE_RANGE 1 a b OR VALUE_RANGE 1 c d) AND (XTESTfoo OR XTESTbar)))" }, 972 994 { "-5..7", "VALUE_RANGE 1 -5 7" }, 973 995 { "hello -5..7", "(hello:(pos=1) FILTER VALUE_RANGE 1 -5 7)" }, 974 996 { "-5..7 hello", "(hello:(pos=1) FILTER VALUE_RANGE 1 -5 7)" }, … … 1020 1042 { "12/03/99..12/04/01", "VALUE_RANGE 1 19990312 20010412" }, 1021 1043 { "03-12-99..04-14-01", "VALUE_RANGE 1 19990312 20010414" }, 1022 1044 { "(test:a..test:b hello)", "(hello:(pos=1) FILTER VALUE_RANGE 3 test:a test:b)" }, 1045 { "12..42kg 5..6kg 1..12", "(VALUE_RANGE 2 1 12 AND (VALUE_RANGE 5 12 42 OR VALUE_RANGE 5 5 6))" }, 1023 1046 { NULL, NULL } 1024 1047 }; 1025 1048