1 | Index: queryparser/queryparser.lemony
|
---|
2 | ===================================================================
|
---|
3 | --- queryparser/queryparser.lemony (revision 8676)
|
---|
4 | +++ queryparser/queryparser.lemony (working copy)
|
---|
5 | @@ -25,6 +25,7 @@
|
---|
6 | #include "queryparser_internal.h"
|
---|
7 | #include <xapian/unicode.h>
|
---|
8 | #include "utils.h"
|
---|
9 | +#include "autoptr.h"
|
---|
10 |
|
---|
11 | // Include the list of token values lemon generates.
|
---|
12 | #include "queryparser_token.h"
|
---|
13 | @@ -177,21 +178,25 @@
|
---|
14 | bool stem;
|
---|
15 | termpos pos;
|
---|
16 |
|
---|
17 | - std::string make_term() const;
|
---|
18 | -
|
---|
19 | public:
|
---|
20 | Term(const string &name_, termpos pos_) : name(name_), stem(false), pos(pos_) { }
|
---|
21 | Term(const string &name_) : name(name_), stem(false), pos(0) { }
|
---|
22 | + Term(const string &name_, const string &prefix_)
|
---|
23 | + : name(name_), prefix(prefix_), stem(false), pos(0) { }
|
---|
24 | Term(termpos pos_) : stem(false), pos(pos_) { }
|
---|
25 | Term(State * state_, const string &name_, const string &prefix_,
|
---|
26 | const string &unstemmed_, bool stem_, termpos pos_)
|
---|
27 | : state(state_), name(name_), prefix(prefix_), unstemmed(unstemmed_),
|
---|
28 | stem(stem_), pos(pos_) { }
|
---|
29 |
|
---|
30 | + std::string make_term() const;
|
---|
31 | +
|
---|
32 | void dont_stem() { stem = false; }
|
---|
33 |
|
---|
34 | termpos get_termpos() const { return pos; }
|
---|
35 |
|
---|
36 | + string get_prefix() const { return prefix; }
|
---|
37 | +
|
---|
38 | QpQuery * as_query() const { return new QpQuery(make_term(), 1, pos); }
|
---|
39 |
|
---|
40 | QpQuery * as_wildcarded_query(State * state) const;
|
---|
41 | @@ -589,17 +594,14 @@
|
---|
42 | // until the next space or ')' as part of the boolean
|
---|
43 | // term.
|
---|
44 | it = p;
|
---|
45 | - if (prefix_needs_colon(prefix, *it))
|
---|
46 | - prefix += ':';
|
---|
47 | - string term;
|
---|
48 | + string name;
|
---|
49 | while (it != end && *it > ' ' && *it != ')')
|
---|
50 | - Unicode::append_utf8(term, *it++);
|
---|
51 | - prefix += term;
|
---|
52 | + Unicode::append_utf8(name, *it++);
|
---|
53 | + AutoPtr<Term> term(new Term(name, prefix));
|
---|
54 | field += ':';
|
---|
55 | - field += term;
|
---|
56 | - unstem.insert(make_pair(prefix, field));
|
---|
57 | - Parse(pParser, BOOLEAN_FILTER, new Term(prefix),
|
---|
58 | - &state);
|
---|
59 | + field += name;
|
---|
60 | + unstem.insert(make_pair(term->make_term(), field));
|
---|
61 | + Parse(pParser, BOOLEAN_FILTER, term.release(), &state);
|
---|
62 | continue;
|
---|
63 | }
|
---|
64 |
|
---|
65 | @@ -858,7 +860,21 @@
|
---|
66 | QpQuery query;
|
---|
67 | QpQuery love;
|
---|
68 | QpQuery hate;
|
---|
69 | - QpQuery filter;
|
---|
70 | +
|
---|
71 | + // filter is a map from prefix to a query for that prefix. Queries with
|
---|
72 | + // the same prefix are combined with OR, and the results of this are
|
---|
73 | + // combined with AND to get the full filter.
|
---|
74 | + map<string, QpQuery> filter;
|
---|
75 | +
|
---|
76 | + QpQuery merge_filters() const {
|
---|
77 | + QpQuery q;
|
---|
78 | + for (map<string, QpQuery>::const_iterator i = filter.begin();
|
---|
79 | + i != filter.end(); ++i)
|
---|
80 | + {
|
---|
81 | + add_to_query(q, Query::OP_AND, i->second.get());
|
---|
82 | + }
|
---|
83 | + return q;
|
---|
84 | + }
|
---|
85 | };
|
---|
86 |
|
---|
87 | class TermList {
|
---|
88 | @@ -1078,10 +1094,10 @@
|
---|
89 | // Handle any boolean filters.
|
---|
90 | if (!P->filter.empty()) {
|
---|
91 | if (E->empty()) {
|
---|
92 | - *E = P->filter;
|
---|
93 | + *E = P->merge_filters();
|
---|
94 | // FIXME and make the query boolean somehow...
|
---|
95 | } else {
|
---|
96 | - *E = QpQuery(Query::OP_FILTER, *E, P->filter);
|
---|
97 | + *E = QpQuery(Query::OP_FILTER, *E, P->merge_filters());
|
---|
98 | }
|
---|
99 | }
|
---|
100 | // FIXME what if E->empty() (all terms are stopwords)?
|
---|
101 | @@ -1101,6 +1117,8 @@
|
---|
102 | %destructor prob {delete $$;}
|
---|
103 |
|
---|
104 | prob(P) ::= RANGE_START(A) RANGE_END(B). {
|
---|
105 | + string prefix = A->get_prefix();
|
---|
106 | + Assert(prefix == B->get_prefix());
|
---|
107 | QpQuery * range = state->value_range(A, B);
|
---|
108 | if (range == NULL) {
|
---|
109 | // Already deleted, so stop yy_parse_failed deleting them again.
|
---|
110 | @@ -1109,11 +1127,13 @@
|
---|
111 | return;
|
---|
112 | }
|
---|
113 | P = new ProbQuery;
|
---|
114 | - P->filter = *range;
|
---|
115 | + P->filter[prefix] = *range;
|
---|
116 | delete range;
|
---|
117 | }
|
---|
118 |
|
---|
119 | prob(P) ::= stop_prob(Q) RANGE_START(A) RANGE_END(B). {
|
---|
120 | + string prefix = A->get_prefix();
|
---|
121 | + Assert(prefix == B->get_prefix());
|
---|
122 | QpQuery * range = state->value_range(A, B);
|
---|
123 | if (range == NULL) {
|
---|
124 | // Already deleted, so stop yy_parse_failed deleting them again.
|
---|
125 | @@ -1122,7 +1142,7 @@
|
---|
126 | return;
|
---|
127 | }
|
---|
128 | P = Q;
|
---|
129 | - add_to_query(P->filter, Query::OP_AND, *range);
|
---|
130 | + add_to_query(P->filter[prefix], Query::OP_AND, *range);
|
---|
131 | delete range;
|
---|
132 | }
|
---|
133 |
|
---|
134 | @@ -1192,29 +1212,29 @@
|
---|
135 |
|
---|
136 | prob(P) ::= BOOLEAN_FILTER(T). {
|
---|
137 | P = new ProbQuery;
|
---|
138 | - P->filter = T->as_query_object();
|
---|
139 | + P->filter[T->get_prefix()] = T->as_query_object();
|
---|
140 | delete T;
|
---|
141 | }
|
---|
142 |
|
---|
143 | prob(P) ::= stop_prob(Q) BOOLEAN_FILTER(T). {
|
---|
144 | P = Q;
|
---|
145 | - // FIXME we should OR filters with the same prefix...
|
---|
146 | - add_to_query(P->filter, Query::OP_AND, T->as_query_object());
|
---|
147 | + // We OR filters with the same prefix...
|
---|
148 | + add_to_query(P->filter[T->get_prefix()], Query::OP_OR, T->as_query_object());
|
---|
149 | delete T;
|
---|
150 | }
|
---|
151 |
|
---|
152 | prob(P) ::= LOVE BOOLEAN_FILTER(T). {
|
---|
153 | // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
|
---|
154 | P = new ProbQuery;
|
---|
155 | - P->filter = T->as_query_object();
|
---|
156 | + P->filter[T->get_prefix()] = T->as_query_object();
|
---|
157 | delete T;
|
---|
158 | }
|
---|
159 |
|
---|
160 | prob(P) ::= stop_prob(Q) LOVE BOOLEAN_FILTER(T). {
|
---|
161 | // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
|
---|
162 | P = Q;
|
---|
163 | - // FIXME we should OR filters with the same prefix...
|
---|
164 | - add_to_query(P->filter, Query::OP_AND, T->as_query_object());
|
---|
165 | + // We OR filters with the same prefix...
|
---|
166 | + add_to_query(P->filter[T->get_prefix()], Query::OP_OR, T->as_query_object());
|
---|
167 | delete T;
|
---|
168 | }
|
---|
169 |
|
---|
170 | Index: tests/queryparsertest.cc
|
---|
171 | ===================================================================
|
---|
172 | --- tests/queryparsertest.cc (revision 8676)
|
---|
173 | +++ tests/queryparsertest.cc (working copy)
|
---|
174 | @@ -532,6 +532,24 @@
|
---|
175 | { "- NEAR 12V voeding", "(near:(pos=1) OR 12v:(pos=2) OR Zvoed:(pos=3))" },
|
---|
176 | { "waarom \"~\" in directorynaam", "(Zwaarom:(pos=1) OR Zin:(pos=2) OR Zdirectorynaam:(pos=3))" },
|
---|
177 | { "cd'r NEAR toebehoren", "(cd'r:(pos=1) NEAR 11 toebehoren:(pos=2))" },
|
---|
178 | + { "site:1 site:2", "(H1 OR H2)" },
|
---|
179 | + { "site:1 site2:2", "(H1 AND J2)" },
|
---|
180 | + { "site:1 site:2 site2:2", "((H1 OR H2) AND J2)" },
|
---|
181 | + { "site:1 OR site:2", "(H1 OR H2)" },
|
---|
182 | + { "site:1 AND site:2", "(H1 AND H2)" },
|
---|
183 | +#if 0
|
---|
184 | + { "A site:1 site:2", "(a FILTER (H1 OR H2))" },
|
---|
185 | + { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" },
|
---|
186 | + { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" },
|
---|
187 | + { "A site:1 site2:2", "(a FILTER (H1 AND J2))" },
|
---|
188 | + { "A site:1 site:2 site2:2", "(a FILTER ((H1 OR H2) AND J2))" },
|
---|
189 | + { "A site:1 OR site:2", "(a FILTER (H1 OR H2))" },
|
---|
190 | + { "A site:1 AND site:2", "(a FILTER (H1 AND H2))" },
|
---|
191 | +#endif
|
---|
192 | + { "site:xapian.org OR site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" },
|
---|
193 | + { "site:xapian.org site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" },
|
---|
194 | + { "site:xapian.org AND site:www.xapian.org", "(Hxapian.org AND Hwww.xapian.org)" },
|
---|
195 | + { "Xapian site:xapian.org site:www.xapian.org", "(xapian:(pos=1) FILTER (Hxapian.org OR Hwww.xapian.org))" },
|
---|
196 | { NULL, NULL }
|
---|
197 | };
|
---|
198 |
|
---|
199 | @@ -576,6 +594,7 @@
|
---|
200 | queryparser.add_prefix("title", "XT");
|
---|
201 | queryparser.add_prefix("subject", "XT");
|
---|
202 | queryparser.add_boolean_prefix("site", "H");
|
---|
203 | + queryparser.add_boolean_prefix("site2", "J");
|
---|
204 | for (test *p = test_or_queries; p->query; ++p) {
|
---|
205 | string expect, parsed;
|
---|
206 | if (p->expect)
|
---|