Ticket #157: boolgroups.4.patch
File boolgroups.4.patch, 11.6 KB (added by , 17 years ago) |
---|
-
queryparser/queryparser.lemony
25 25 #include "queryparser_internal.h" 26 26 #include <xapian/unicode.h> 27 27 #include "utils.h" 28 #include "autoptr.h" 28 29 29 30 // Include the list of token values lemon generates. 30 31 #include "queryparser_token.h" … … 133 134 QpQuery(bool m) : q(), match_nothing(m) {} 134 135 Query & get() { return q; } 135 136 const Query & get() const { return q; } 137 Xapian::valueno get_valno() const { return q.internal->get_parameter(); } 136 138 137 139 /// True iff the query is not empty, and doesn't explicitly match nothing. 138 140 bool can_match() { return (!q.empty() && !match_nothing); } … … 160 162 #endif 161 163 }; 162 164 165 /// A structure identifying a group of filter terms 166 struct filter_group_id { 167 /** The prefix of the filter terms. 168 * This is used for boolean filter terms. 169 */ 170 string prefix; 171 172 /** The value number of the filter terms. 173 * This is used for value range terms. 174 */ 175 Xapian::valueno valno; 176 177 /// Make a new filter_group_id for boolean filter terms. 178 explicit filter_group_id(const string & prefix_) : prefix(prefix_), valno(Xapian::BAD_VALUENO) {} 179 180 /// Make a new filter_group_id for value range terms. 181 explicit filter_group_id(Xapian::valueno valno_) : prefix(), valno(valno_) {} 182 183 /// Compare to another filter_group_id. 184 bool operator<(const filter_group_id & other) const { 185 if (prefix != other.prefix) { 186 return prefix < other.prefix; 187 } 188 return valno < other.valno; 189 } 190 }; 191 163 192 /** Class used to pass information about a token from lexer to parser. 164 193 * 165 194 * Generally a this class carries term information, but it can be used for the … … 177 206 bool stem; 178 207 termpos pos; 179 208 180 std::string make_term() const;181 182 209 public: 183 210 Term(const string &name_, termpos pos_) : name(name_), stem(false), pos(pos_) { } 184 211 Term(const string &name_) : name(name_), stem(false), pos(0) { } 212 Term(const string &name_, const string &prefix_) 213 : name(name_), prefix(prefix_), stem(false), pos(0) { } 185 214 Term(termpos pos_) : stem(false), pos(pos_) { } 186 215 Term(State * state_, const string &name_, const string &prefix_, 187 216 const string &unstemmed_, bool stem_, termpos pos_) 188 217 : state(state_), name(name_), prefix(prefix_), unstemmed(unstemmed_), 189 218 stem(stem_), pos(pos_) { } 190 219 220 std::string make_term() const; 221 191 222 void dont_stem() { stem = false; } 192 223 193 224 termpos get_termpos() const { return pos; } 194 225 226 filter_group_id get_filter_group_id() const { return filter_group_id(prefix); } 227 195 228 QpQuery * as_query() const { return new QpQuery(make_term(), 1, pos); } 196 229 197 230 QpQuery * as_wildcarded_query(State * state) const; … … 587 620 // until the next space or ')' as part of the boolean 588 621 // term. 589 622 it = p; 590 if (prefix_needs_colon(prefix, *it)) 591 prefix += ':'; 592 string term; 623 string name; 593 624 while (it != end && *it > ' ' && *it != ')') 594 Unicode::append_utf8( term, *it++);595 prefix += term;625 Unicode::append_utf8(name, *it++); 626 AutoPtr<Term> term(new Term(name, prefix)); 596 627 field += ':'; 597 field += term; 598 unstem.insert(make_pair(prefix, field)); 599 Parse(pParser, BOOLEAN_FILTER, new Term(prefix), 600 &state); 628 field += name; 629 unstem.insert(make_pair(term->make_term(), field)); 630 Parse(pParser, BOOLEAN_FILTER, term.release(), &state); 601 631 continue; 602 632 } 603 633 … … 856 886 QpQuery query; 857 887 QpQuery love; 858 888 QpQuery hate; 859 QpQuery filter; 889 890 // filter is a map from prefix to a query for that prefix. Queries with 891 // the same prefix are combined with OR, and the results of this are 892 // combined with AND to get the full filter. 893 map<filter_group_id, QpQuery> filter; 894 895 QpQuery merge_filters() const { 896 QpQuery q; 897 for (map<filter_group_id, QpQuery>::const_iterator i = filter.begin(); 898 i != filter.end(); ++i) 899 { 900 add_to_query(q, Query::OP_AND, i->second.get()); 901 } 902 return q; 903 } 860 904 }; 861 905 862 906 class TermList { … … 1076 1120 // Handle any boolean filters. 1077 1121 if (!P->filter.empty()) { 1078 1122 if (E->empty()) { 1079 *E = P-> filter;1123 *E = P->merge_filters(); 1080 1124 // FIXME and make the query boolean somehow... 1081 1125 } else { 1082 *E = QpQuery(Query::OP_FILTER, *E, P-> filter);1126 *E = QpQuery(Query::OP_FILTER, *E, P->merge_filters()); 1083 1127 } 1084 1128 } 1085 1129 // FIXME what if E->empty() (all terms are stopwords)? … … 1106 1150 yy_parse_failed(yypParser); 1107 1151 return; 1108 1152 } 1153 Xapian::valueno valno = range->get_valno(); 1109 1154 P = new ProbQuery; 1110 P->filter = *range;1155 P->filter[filter_group_id(valno)] = *range; 1111 1156 delete range; 1112 1157 } 1113 1158 … … 1119 1164 yy_parse_failed(yypParser); 1120 1165 return; 1121 1166 } 1167 Xapian::valueno valno = range->get_valno(); 1122 1168 P = Q; 1123 add_to_query(P->filter , Query::OP_AND, *range);1169 add_to_query(P->filter[filter_group_id(valno)], Query::OP_OR, *range); 1124 1170 delete range; 1125 1171 } 1126 1172 … … 1190 1236 1191 1237 prob(P) ::= BOOLEAN_FILTER(T). { 1192 1238 P = new ProbQuery; 1193 P->filter = T->as_query_object();1239 P->filter[T->get_filter_group_id()] = T->as_query_object(); 1194 1240 delete T; 1195 1241 } 1196 1242 1197 1243 prob(P) ::= stop_prob(Q) BOOLEAN_FILTER(T). { 1198 1244 P = Q; 1199 // FIXME we shouldOR filters with the same prefix...1200 add_to_query(P->filter , Query::OP_AND, T->as_query_object());1245 // We OR filters with the same prefix... 1246 add_to_query(P->filter[T->get_filter_group_id()], Query::OP_OR, T->as_query_object()); 1201 1247 delete T; 1202 1248 } 1203 1249 1204 1250 prob(P) ::= LOVE BOOLEAN_FILTER(T). { 1205 1251 // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER 1206 1252 P = new ProbQuery; 1207 P->filter = T->as_query_object();1253 P->filter[T->get_filter_group_id()] = T->as_query_object(); 1208 1254 delete T; 1209 1255 } 1210 1256 1211 1257 prob(P) ::= stop_prob(Q) LOVE BOOLEAN_FILTER(T). { 1212 1258 // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER 1213 1259 P = Q; 1214 // FIXME we shouldOR filters with the same prefix...1215 add_to_query(P->filter , Query::OP_AND, T->as_query_object());1260 // We OR filters with the same prefix... 1261 add_to_query(P->filter[T->get_filter_group_id()], Query::OP_OR, T->as_query_object()); 1216 1262 delete T; 1217 1263 } 1218 1264 -
tests/queryparsertest.cc
532 532 { "- NEAR 12V voeding", "(near:(pos=1) OR 12v:(pos=2) OR Zvoed:(pos=3))" }, 533 533 { "waarom \"~\" in directorynaam", "(Zwaarom:(pos=1) OR Zin:(pos=2) OR Zdirectorynaam:(pos=3))" }, 534 534 { "cd'r NEAR toebehoren", "(cd'r:(pos=1) NEAR 11 toebehoren:(pos=2))" }, 535 { "site:1 site:2", "(H1 OR H2)" }, 536 { "site:1 site2:2", "(H1 AND J2)" }, 537 { "site:1 site:2 site2:2", "((H1 OR H2) AND J2)" }, 538 { "site:1 OR site:2", "(H1 OR H2)" }, 539 { "site:1 AND site:2", "(H1 AND H2)" }, 540 #if 0 541 { "A site:1 site:2", "(a FILTER (H1 OR H2))" }, 542 { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" }, 543 { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" }, 544 { "A site:1 site2:2", "(a FILTER (H1 AND J2))" }, 545 { "A site:1 site:2 site2:2", "(a FILTER ((H1 OR H2) AND J2))" }, 546 { "A site:1 OR site:2", "(a FILTER (H1 OR H2))" }, 547 { "A site:1 AND site:2", "(a FILTER (H1 AND H2))" }, 548 #endif 549 { "site:xapian.org OR site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" }, 550 { "site:xapian.org site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" }, 551 { "site:xapian.org AND site:www.xapian.org", "(Hxapian.org AND Hwww.xapian.org)" }, 552 { "Xapian site:xapian.org site:www.xapian.org", "(xapian:(pos=1) FILTER (Hxapian.org OR Hwww.xapian.org))" }, 553 { "author:richard author:olly writer:charlie", "(ZArichard:(pos=1) OR ZAolli:(pos=2) OR ZAcharli:(pos=3))"}, 535 554 { NULL, NULL } 536 555 }; 537 556 … … 573 592 queryparser.set_stemmer(Xapian::Stem("english")); 574 593 queryparser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); 575 594 queryparser.add_prefix("author", "A"); 595 queryparser.add_prefix("writer", "A"); 576 596 queryparser.add_prefix("title", "XT"); 577 597 queryparser.add_prefix("subject", "XT"); 578 598 queryparser.add_boolean_prefix("site", "H"); 599 queryparser.add_boolean_prefix("site2", "J"); 579 600 for (test *p = test_or_queries; p->query; ++p) { 580 601 string expect, parsed; 581 602 if (p->expect) … … 977 998 { "hello a..b", "(hello:(pos=1) FILTER VALUE_RANGE 1 a b)" }, 978 999 { "hello a..b world", "((hello:(pos=1) OR world:(pos=2)) FILTER VALUE_RANGE 1 a b)" }, 979 1000 { "hello a..b test:foo", "(hello:(pos=1) FILTER (VALUE_RANGE 1 a b AND XTESTfoo))" }, 1001 { "hello a..b test:foo test:bar", "(hello:(pos=1) FILTER (VALUE_RANGE 1 a b AND (XTESTfoo OR XTESTbar)))" }, 1002 { "hello a..b c..d test:foo", "(hello:(pos=1) FILTER ((VALUE_RANGE 1 a b OR VALUE_RANGE 1 c d) AND XTESTfoo))" }, 1003 { "hello a..b c..d test:foo test:bar", "(hello:(pos=1) FILTER ((VALUE_RANGE 1 a b OR VALUE_RANGE 1 c d) AND (XTESTfoo OR XTESTbar)))" }, 980 1004 { "-5..7", "VALUE_RANGE 1 -5 7" }, 981 1005 { "hello -5..7", "(hello:(pos=1) FILTER VALUE_RANGE 1 -5 7)" }, 982 1006 { "-5..7 hello", "(hello:(pos=1) FILTER VALUE_RANGE 1 -5 7)" }, … … 1030 1054 { "12/03/99..12/04/01", "VALUE_RANGE 1 19990312 20010412" }, 1031 1055 { "03-12-99..04-14-01", "VALUE_RANGE 1 19990312 20010414" }, 1032 1056 { "(test:a..test:b hello)", "(hello:(pos=1) FILTER VALUE_RANGE 3 test:a test:b)" }, 1057 { "12..42kg 5..6kg 1..12", "(VALUE_RANGE 2 1 12 AND (VALUE_RANGE 5 12 42 OR VALUE_RANGE 5 5 6))" }, 1033 1058 { NULL, NULL } 1034 1059 }; 1035 1060 -
include/xapian/query.h
377 377 */ 378 378 std::string get_description() const; 379 379 380 /** Get the numeric parameter used in this query. 381 * 382 * This is used by the queryparser to get the value number for 383 * VALUE_RANGE queries. It should be replaced by a public method on 384 * the Query class at some point, but the API which should be used for 385 * that is unclear, so this is a temporary workaround. 386 */ 387 Xapian::termcount get_parameter() const { return parameter; } 388 380 389 /** Get the length of the query, used by some ranking formulae. 381 390 * This value is calculated automatically - if you want to override 382 391 * it you can pass a different value to Enquire::set_query(). -
include/xapian/queryparser.h
238 238 * will be converted to Hxapian.org combined with any probabilistic 239 239 * query with OP_FILTER. 240 240 * 241 * Multiple fields can be mapped to the same prefix (so you can 242 * e.g. make site: and domain: aliases for each other). 241 * If multiple boolean filters are specified in a query for the same 242 * prefix, they will be combined with the OR operator. Then, if there are 243 * boolean filters for different prefixes, they will be combined with the 244 * AND operator. 243 245 * 246 * Multiple fields can be mapped to the same prefix (so you can e.g. make 247 * site: and domain: aliases for each other). Instances of fields with 248 * different aliases but the same prefix will still be combined with the 249 * OR operator. 250 * 251 * For example, if "site" and "domain" map to "H", but author maps to "A", 252 * a search for "site:Foo domain:Bar author:Fred" will map to 253 * "(Hfoo OR Hbar) AND Afred". 254 * 244 255 * @param field The user visible field name 245 256 * @param prefix The term prefix to map this to 246 257 */