Ticket #157: patch1.2

File patch1.2, 6.9 KB (added by Richard Boulton, 17 years ago)

Implementation of ORring filter terms with the same prefix

Line 
1Index: queryparser/queryparser.lemony
2===================================================================
3--- queryparser/queryparser.lemony (revision 8676)
4+++ queryparser/queryparser.lemony (working copy)
5@@ -25,6 +25,7 @@
6 #include "queryparser_internal.h"
7 #include <xapian/unicode.h>
8 #include "utils.h"
9+#include "autoptr.h"
10
11 // Include the list of token values lemon generates.
12 #include "queryparser_token.h"
13@@ -177,21 +178,25 @@
14 bool stem;
15 termpos pos;
16
17- std::string make_term() const;
18-
19 public:
20 Term(const string &name_, termpos pos_) : name(name_), stem(false), pos(pos_) { }
21 Term(const string &name_) : name(name_), stem(false), pos(0) { }
22+ Term(const string &name_, const string &prefix_)
23+ : name(name_), prefix(prefix_), stem(false), pos(0) { }
24 Term(termpos pos_) : stem(false), pos(pos_) { }
25 Term(State * state_, const string &name_, const string &prefix_,
26 const string &unstemmed_, bool stem_, termpos pos_)
27 : state(state_), name(name_), prefix(prefix_), unstemmed(unstemmed_),
28 stem(stem_), pos(pos_) { }
29
30+ std::string make_term() const;
31+
32 void dont_stem() { stem = false; }
33
34 termpos get_termpos() const { return pos; }
35
36+ string get_prefix() const { return prefix; }
37+
38 QpQuery * as_query() const { return new QpQuery(make_term(), 1, pos); }
39
40 QpQuery * as_wildcarded_query(State * state) const;
41@@ -589,17 +594,14 @@
42 // until the next space or ')' as part of the boolean
43 // term.
44 it = p;
45- if (prefix_needs_colon(prefix, *it))
46- prefix += ':';
47- string term;
48+ string name;
49 while (it != end && *it > ' ' && *it != ')')
50- Unicode::append_utf8(term, *it++);
51- prefix += term;
52+ Unicode::append_utf8(name, *it++);
53+ AutoPtr<Term> term(new Term(name, prefix));
54 field += ':';
55- field += term;
56- unstem.insert(make_pair(prefix, field));
57- Parse(pParser, BOOLEAN_FILTER, new Term(prefix),
58- &state);
59+ field += name;
60+ unstem.insert(make_pair(term->make_term(), field));
61+ Parse(pParser, BOOLEAN_FILTER, term.release(), &state);
62 continue;
63 }
64
65@@ -858,7 +860,21 @@
66 QpQuery query;
67 QpQuery love;
68 QpQuery hate;
69- QpQuery filter;
70+
71+ // filter is a map from prefix to a query for that prefix. Queries with
72+ // the same prefix are combined with OR, and the results of this are
73+ // combined with AND to get the full filter.
74+ map<string, QpQuery> filter;
75+
76+ QpQuery merge_filters() const {
77+ QpQuery q;
78+ for (map<string, QpQuery>::const_iterator i = filter.begin();
79+ i != filter.end(); ++i)
80+ {
81+ add_to_query(q, Query::OP_AND, i->second.get());
82+ }
83+ return q;
84+ }
85 };
86
87 class TermList {
88@@ -1078,10 +1094,10 @@
89 // Handle any boolean filters.
90 if (!P->filter.empty()) {
91 if (E->empty()) {
92- *E = P->filter;
93+ *E = P->merge_filters();
94 // FIXME and make the query boolean somehow...
95 } else {
96- *E = QpQuery(Query::OP_FILTER, *E, P->filter);
97+ *E = QpQuery(Query::OP_FILTER, *E, P->merge_filters());
98 }
99 }
100 // FIXME what if E->empty() (all terms are stopwords)?
101@@ -1101,6 +1117,8 @@
102 %destructor prob {delete $$;}
103
104 prob(P) ::= RANGE_START(A) RANGE_END(B). {
105+ string prefix = A->get_prefix();
106+ Assert(prefix == B->get_prefix());
107 QpQuery * range = state->value_range(A, B);
108 if (range == NULL) {
109 // Already deleted, so stop yy_parse_failed deleting them again.
110@@ -1109,11 +1127,13 @@
111 return;
112 }
113 P = new ProbQuery;
114- P->filter = *range;
115+ P->filter[prefix] = *range;
116 delete range;
117 }
118
119 prob(P) ::= stop_prob(Q) RANGE_START(A) RANGE_END(B). {
120+ string prefix = A->get_prefix();
121+ Assert(prefix == B->get_prefix());
122 QpQuery * range = state->value_range(A, B);
123 if (range == NULL) {
124 // Already deleted, so stop yy_parse_failed deleting them again.
125@@ -1122,7 +1142,7 @@
126 return;
127 }
128 P = Q;
129- add_to_query(P->filter, Query::OP_AND, *range);
130+ add_to_query(P->filter[prefix], Query::OP_AND, *range);
131 delete range;
132 }
133
134@@ -1192,29 +1212,29 @@
135
136 prob(P) ::= BOOLEAN_FILTER(T). {
137 P = new ProbQuery;
138- P->filter = T->as_query_object();
139+ P->filter[T->get_prefix()] = T->as_query_object();
140 delete T;
141 }
142
143 prob(P) ::= stop_prob(Q) BOOLEAN_FILTER(T). {
144 P = Q;
145- // FIXME we should OR filters with the same prefix...
146- add_to_query(P->filter, Query::OP_AND, T->as_query_object());
147+ // We OR filters with the same prefix...
148+ add_to_query(P->filter[T->get_prefix()], Query::OP_OR, T->as_query_object());
149 delete T;
150 }
151
152 prob(P) ::= LOVE BOOLEAN_FILTER(T). {
153 // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
154 P = new ProbQuery;
155- P->filter = T->as_query_object();
156+ P->filter[T->get_prefix()] = T->as_query_object();
157 delete T;
158 }
159
160 prob(P) ::= stop_prob(Q) LOVE BOOLEAN_FILTER(T). {
161 // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
162 P = Q;
163- // FIXME we should OR filters with the same prefix...
164- add_to_query(P->filter, Query::OP_AND, T->as_query_object());
165+ // We OR filters with the same prefix...
166+ add_to_query(P->filter[T->get_prefix()], Query::OP_OR, T->as_query_object());
167 delete T;
168 }
169
170Index: tests/queryparsertest.cc
171===================================================================
172--- tests/queryparsertest.cc (revision 8676)
173+++ tests/queryparsertest.cc (working copy)
174@@ -532,6 +532,24 @@
175 { "- NEAR 12V voeding", "(near:(pos=1) OR 12v:(pos=2) OR Zvoed:(pos=3))" },
176 { "waarom \"~\" in directorynaam", "(Zwaarom:(pos=1) OR Zin:(pos=2) OR Zdirectorynaam:(pos=3))" },
177 { "cd'r NEAR toebehoren", "(cd'r:(pos=1) NEAR 11 toebehoren:(pos=2))" },
178+ { "site:1 site:2", "(H1 OR H2)" },
179+ { "site:1 site2:2", "(H1 AND J2)" },
180+ { "site:1 site:2 site2:2", "((H1 OR H2) AND J2)" },
181+ { "site:1 OR site:2", "(H1 OR H2)" },
182+ { "site:1 AND site:2", "(H1 AND H2)" },
183+#if 0
184+ { "A site:1 site:2", "(a FILTER (H1 OR H2))" },
185+ { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" },
186+ { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" },
187+ { "A site:1 site2:2", "(a FILTER (H1 AND J2))" },
188+ { "A site:1 site:2 site2:2", "(a FILTER ((H1 OR H2) AND J2))" },
189+ { "A site:1 OR site:2", "(a FILTER (H1 OR H2))" },
190+ { "A site:1 AND site:2", "(a FILTER (H1 AND H2))" },
191+#endif
192+ { "site:xapian.org OR site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" },
193+ { "site:xapian.org site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" },
194+ { "site:xapian.org AND site:www.xapian.org", "(Hxapian.org AND Hwww.xapian.org)" },
195+ { "Xapian site:xapian.org site:www.xapian.org", "(xapian:(pos=1) FILTER (Hxapian.org OR Hwww.xapian.org))" },
196 { NULL, NULL }
197 };
198
199@@ -576,6 +594,7 @@
200 queryparser.add_prefix("title", "XT");
201 queryparser.add_prefix("subject", "XT");
202 queryparser.add_boolean_prefix("site", "H");
203+ queryparser.add_boolean_prefix("site2", "J");
204 for (test *p = test_or_queries; p->query; ++p) {
205 string expect, parsed;
206 if (p->expect)