| 1 | Index: queryparser/queryparser.lemony
|
|---|
| 2 | ===================================================================
|
|---|
| 3 | --- queryparser/queryparser.lemony (revision 8676)
|
|---|
| 4 | +++ queryparser/queryparser.lemony (working copy)
|
|---|
| 5 | @@ -25,6 +25,7 @@
|
|---|
| 6 | #include "queryparser_internal.h"
|
|---|
| 7 | #include <xapian/unicode.h>
|
|---|
| 8 | #include "utils.h"
|
|---|
| 9 | +#include "autoptr.h"
|
|---|
| 10 |
|
|---|
| 11 | // Include the list of token values lemon generates.
|
|---|
| 12 | #include "queryparser_token.h"
|
|---|
| 13 | @@ -177,21 +178,25 @@
|
|---|
| 14 | bool stem;
|
|---|
| 15 | termpos pos;
|
|---|
| 16 |
|
|---|
| 17 | - std::string make_term() const;
|
|---|
| 18 | -
|
|---|
| 19 | public:
|
|---|
| 20 | Term(const string &name_, termpos pos_) : name(name_), stem(false), pos(pos_) { }
|
|---|
| 21 | Term(const string &name_) : name(name_), stem(false), pos(0) { }
|
|---|
| 22 | + Term(const string &name_, const string &prefix_)
|
|---|
| 23 | + : name(name_), prefix(prefix_), stem(false), pos(0) { }
|
|---|
| 24 | Term(termpos pos_) : stem(false), pos(pos_) { }
|
|---|
| 25 | Term(State * state_, const string &name_, const string &prefix_,
|
|---|
| 26 | const string &unstemmed_, bool stem_, termpos pos_)
|
|---|
| 27 | : state(state_), name(name_), prefix(prefix_), unstemmed(unstemmed_),
|
|---|
| 28 | stem(stem_), pos(pos_) { }
|
|---|
| 29 |
|
|---|
| 30 | + std::string make_term() const;
|
|---|
| 31 | +
|
|---|
| 32 | void dont_stem() { stem = false; }
|
|---|
| 33 |
|
|---|
| 34 | termpos get_termpos() const { return pos; }
|
|---|
| 35 |
|
|---|
| 36 | + string get_prefix() const { return prefix; }
|
|---|
| 37 | +
|
|---|
| 38 | QpQuery * as_query() const { return new QpQuery(make_term(), 1, pos); }
|
|---|
| 39 |
|
|---|
| 40 | QpQuery * as_wildcarded_query(State * state) const;
|
|---|
| 41 | @@ -589,17 +594,14 @@
|
|---|
| 42 | // until the next space or ')' as part of the boolean
|
|---|
| 43 | // term.
|
|---|
| 44 | it = p;
|
|---|
| 45 | - if (prefix_needs_colon(prefix, *it))
|
|---|
| 46 | - prefix += ':';
|
|---|
| 47 | - string term;
|
|---|
| 48 | + string name;
|
|---|
| 49 | while (it != end && *it > ' ' && *it != ')')
|
|---|
| 50 | - Unicode::append_utf8(term, *it++);
|
|---|
| 51 | - prefix += term;
|
|---|
| 52 | + Unicode::append_utf8(name, *it++);
|
|---|
| 53 | + AutoPtr<Term> term(new Term(name, prefix));
|
|---|
| 54 | field += ':';
|
|---|
| 55 | - field += term;
|
|---|
| 56 | - unstem.insert(make_pair(prefix, field));
|
|---|
| 57 | - Parse(pParser, BOOLEAN_FILTER, new Term(prefix),
|
|---|
| 58 | - &state);
|
|---|
| 59 | + field += name;
|
|---|
| 60 | + unstem.insert(make_pair(term->make_term(), field));
|
|---|
| 61 | + Parse(pParser, BOOLEAN_FILTER, term.release(), &state);
|
|---|
| 62 | continue;
|
|---|
| 63 | }
|
|---|
| 64 |
|
|---|
| 65 | @@ -858,7 +860,21 @@
|
|---|
| 66 | QpQuery query;
|
|---|
| 67 | QpQuery love;
|
|---|
| 68 | QpQuery hate;
|
|---|
| 69 | - QpQuery filter;
|
|---|
| 70 | +
|
|---|
| 71 | + // filter is a map from prefix to a query for that prefix. Queries with
|
|---|
| 72 | + // the same prefix are combined with OR, and the results of this are
|
|---|
| 73 | + // combined with AND to get the full filter.
|
|---|
| 74 | + map<string, QpQuery> filter;
|
|---|
| 75 | +
|
|---|
| 76 | + QpQuery merge_filters() const {
|
|---|
| 77 | + QpQuery q;
|
|---|
| 78 | + for (map<string, QpQuery>::const_iterator i = filter.begin();
|
|---|
| 79 | + i != filter.end(); ++i)
|
|---|
| 80 | + {
|
|---|
| 81 | + add_to_query(q, Query::OP_AND, i->second.get());
|
|---|
| 82 | + }
|
|---|
| 83 | + return q;
|
|---|
| 84 | + }
|
|---|
| 85 | };
|
|---|
| 86 |
|
|---|
| 87 | class TermList {
|
|---|
| 88 | @@ -1078,10 +1094,10 @@
|
|---|
| 89 | // Handle any boolean filters.
|
|---|
| 90 | if (!P->filter.empty()) {
|
|---|
| 91 | if (E->empty()) {
|
|---|
| 92 | - *E = P->filter;
|
|---|
| 93 | + *E = P->merge_filters();
|
|---|
| 94 | // FIXME and make the query boolean somehow...
|
|---|
| 95 | } else {
|
|---|
| 96 | - *E = QpQuery(Query::OP_FILTER, *E, P->filter);
|
|---|
| 97 | + *E = QpQuery(Query::OP_FILTER, *E, P->merge_filters());
|
|---|
| 98 | }
|
|---|
| 99 | }
|
|---|
| 100 | // FIXME what if E->empty() (all terms are stopwords)?
|
|---|
| 101 | @@ -1101,6 +1117,8 @@
|
|---|
| 102 | %destructor prob {delete $$;}
|
|---|
| 103 |
|
|---|
| 104 | prob(P) ::= RANGE_START(A) RANGE_END(B). {
|
|---|
| 105 | + string prefix = A->get_prefix();
|
|---|
| 106 | + Assert(prefix == B->get_prefix());
|
|---|
| 107 | QpQuery * range = state->value_range(A, B);
|
|---|
| 108 | if (range == NULL) {
|
|---|
| 109 | // Already deleted, so stop yy_parse_failed deleting them again.
|
|---|
| 110 | @@ -1109,11 +1127,13 @@
|
|---|
| 111 | return;
|
|---|
| 112 | }
|
|---|
| 113 | P = new ProbQuery;
|
|---|
| 114 | - P->filter = *range;
|
|---|
| 115 | + P->filter[prefix] = *range;
|
|---|
| 116 | delete range;
|
|---|
| 117 | }
|
|---|
| 118 |
|
|---|
| 119 | prob(P) ::= stop_prob(Q) RANGE_START(A) RANGE_END(B). {
|
|---|
| 120 | + string prefix = A->get_prefix();
|
|---|
| 121 | + Assert(prefix == B->get_prefix());
|
|---|
| 122 | QpQuery * range = state->value_range(A, B);
|
|---|
| 123 | if (range == NULL) {
|
|---|
| 124 | // Already deleted, so stop yy_parse_failed deleting them again.
|
|---|
| 125 | @@ -1122,7 +1142,7 @@
|
|---|
| 126 | return;
|
|---|
| 127 | }
|
|---|
| 128 | P = Q;
|
|---|
| 129 | - add_to_query(P->filter, Query::OP_AND, *range);
|
|---|
| 130 | + add_to_query(P->filter[prefix], Query::OP_AND, *range);
|
|---|
| 131 | delete range;
|
|---|
| 132 | }
|
|---|
| 133 |
|
|---|
| 134 | @@ -1192,29 +1212,29 @@
|
|---|
| 135 |
|
|---|
| 136 | prob(P) ::= BOOLEAN_FILTER(T). {
|
|---|
| 137 | P = new ProbQuery;
|
|---|
| 138 | - P->filter = T->as_query_object();
|
|---|
| 139 | + P->filter[T->get_prefix()] = T->as_query_object();
|
|---|
| 140 | delete T;
|
|---|
| 141 | }
|
|---|
| 142 |
|
|---|
| 143 | prob(P) ::= stop_prob(Q) BOOLEAN_FILTER(T). {
|
|---|
| 144 | P = Q;
|
|---|
| 145 | - // FIXME we should OR filters with the same prefix...
|
|---|
| 146 | - add_to_query(P->filter, Query::OP_AND, T->as_query_object());
|
|---|
| 147 | + // We OR filters with the same prefix...
|
|---|
| 148 | + add_to_query(P->filter[T->get_prefix()], Query::OP_OR, T->as_query_object());
|
|---|
| 149 | delete T;
|
|---|
| 150 | }
|
|---|
| 151 |
|
|---|
| 152 | prob(P) ::= LOVE BOOLEAN_FILTER(T). {
|
|---|
| 153 | // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
|
|---|
| 154 | P = new ProbQuery;
|
|---|
| 155 | - P->filter = T->as_query_object();
|
|---|
| 156 | + P->filter[T->get_prefix()] = T->as_query_object();
|
|---|
| 157 | delete T;
|
|---|
| 158 | }
|
|---|
| 159 |
|
|---|
| 160 | prob(P) ::= stop_prob(Q) LOVE BOOLEAN_FILTER(T). {
|
|---|
| 161 | // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER
|
|---|
| 162 | P = Q;
|
|---|
| 163 | - // FIXME we should OR filters with the same prefix...
|
|---|
| 164 | - add_to_query(P->filter, Query::OP_AND, T->as_query_object());
|
|---|
| 165 | + // We OR filters with the same prefix...
|
|---|
| 166 | + add_to_query(P->filter[T->get_prefix()], Query::OP_OR, T->as_query_object());
|
|---|
| 167 | delete T;
|
|---|
| 168 | }
|
|---|
| 169 |
|
|---|
| 170 | Index: tests/queryparsertest.cc
|
|---|
| 171 | ===================================================================
|
|---|
| 172 | --- tests/queryparsertest.cc (revision 8676)
|
|---|
| 173 | +++ tests/queryparsertest.cc (working copy)
|
|---|
| 174 | @@ -532,6 +532,24 @@
|
|---|
| 175 | { "- NEAR 12V voeding", "(near:(pos=1) OR 12v:(pos=2) OR Zvoed:(pos=3))" },
|
|---|
| 176 | { "waarom \"~\" in directorynaam", "(Zwaarom:(pos=1) OR Zin:(pos=2) OR Zdirectorynaam:(pos=3))" },
|
|---|
| 177 | { "cd'r NEAR toebehoren", "(cd'r:(pos=1) NEAR 11 toebehoren:(pos=2))" },
|
|---|
| 178 | + { "site:1 site:2", "(H1 OR H2)" },
|
|---|
| 179 | + { "site:1 site2:2", "(H1 AND J2)" },
|
|---|
| 180 | + { "site:1 site:2 site2:2", "((H1 OR H2) AND J2)" },
|
|---|
| 181 | + { "site:1 OR site:2", "(H1 OR H2)" },
|
|---|
| 182 | + { "site:1 AND site:2", "(H1 AND H2)" },
|
|---|
| 183 | +#if 0
|
|---|
| 184 | + { "A site:1 site:2", "(a FILTER (H1 OR H2))" },
|
|---|
| 185 | + { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" },
|
|---|
| 186 | + { "A (site:1 OR site:2)", "(a FILTER (H1 OR H2))" },
|
|---|
| 187 | + { "A site:1 site2:2", "(a FILTER (H1 AND J2))" },
|
|---|
| 188 | + { "A site:1 site:2 site2:2", "(a FILTER ((H1 OR H2) AND J2))" },
|
|---|
| 189 | + { "A site:1 OR site:2", "(a FILTER (H1 OR H2))" },
|
|---|
| 190 | + { "A site:1 AND site:2", "(a FILTER (H1 AND H2))" },
|
|---|
| 191 | +#endif
|
|---|
| 192 | + { "site:xapian.org OR site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" },
|
|---|
| 193 | + { "site:xapian.org site:www.xapian.org", "(Hxapian.org OR Hwww.xapian.org)" },
|
|---|
| 194 | + { "site:xapian.org AND site:www.xapian.org", "(Hxapian.org AND Hwww.xapian.org)" },
|
|---|
| 195 | + { "Xapian site:xapian.org site:www.xapian.org", "(xapian:(pos=1) FILTER (Hxapian.org OR Hwww.xapian.org))" },
|
|---|
| 196 | { NULL, NULL }
|
|---|
| 197 | };
|
|---|
| 198 |
|
|---|
| 199 | @@ -576,6 +594,7 @@
|
|---|
| 200 | queryparser.add_prefix("title", "XT");
|
|---|
| 201 | queryparser.add_prefix("subject", "XT");
|
|---|
| 202 | queryparser.add_boolean_prefix("site", "H");
|
|---|
| 203 | + queryparser.add_boolean_prefix("site2", "J");
|
|---|
| 204 | for (test *p = test_or_queries; p->query; ++p) {
|
|---|
| 205 | string expect, parsed;
|
|---|
| 206 | if (p->expect)
|
|---|