diff --git a/xapian-core/include/xapian/queryparser.h b/xapian-core/include/xapian/queryparser.h
index 829e187..089a832 100644
a
|
b
|
class XAPIAN_VISIBILITY_DEFAULT QueryParser {
|
409 | 409 | FLAG_DEFAULT = FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE |
410 | 410 | } feature_flag; |
411 | 411 | |
412 | | typedef enum { STEM_NONE, STEM_SOME, STEM_ALL } stem_strategy; |
| 412 | typedef enum { STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z } stem_strategy; |
413 | 413 | |
414 | 414 | /// Copy constructor. |
415 | 415 | QueryParser(const QueryParser & o); |
… |
… |
class XAPIAN_VISIBILITY_DEFAULT QueryParser {
|
452 | 452 | * terms are prefixed with 'Z'. (default in Xapian > 1.3.1) |
453 | 453 | * - STEM_ALL: Search for stemmed forms of all words (note: no 'Z' |
454 | 454 | * prefix is added). |
| 455 | * - STEM_ALL_Z: Search for stemmed forms of all words (note: 'Z' |
| 456 | * prefix is added). |
455 | 457 | */ |
456 | 458 | void set_stemming_strategy(stem_strategy strategy); |
457 | 459 | |
diff --git a/xapian-core/include/xapian/termgenerator.h b/xapian-core/include/xapian/termgenerator.h
index 28f4294..28af489 100644
a
|
b
|
class XAPIAN_VISIBILITY_DEFAULT TermGenerator {
|
88 | 88 | FLAG_SPELLING = 128 // Value matches QueryParser flag. |
89 | 89 | }; |
90 | 90 | |
| 91 | /** Modes for indexing text |
| 92 | * Following option provides matching modes for QueryParser and TermGenerator. |
| 93 | */ |
| 94 | typedef enum { STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z } stem_strategy; |
| 95 | |
91 | 96 | /** Set flags. |
92 | 97 | * |
93 | 98 | * The new value of flags is: (flags & mask) ^ toggle |
… |
… |
class XAPIAN_VISIBILITY_DEFAULT TermGenerator {
|
102 | 107 | */ |
103 | 108 | flags set_flags(flags toggle, flags mask = flags(0)); |
104 | 109 | |
| 110 | /** Set the mode for indexing. (default is STEM_SOME) |
| 111 | * |
| 112 | * @param strategy The strategy to use - possible values are: |
| 113 | * - STEM_NONE: Don't index any stemmed word |
| 114 | * - STEM_SOME: Index both stemmed as well as full (non-stemmed) words.(note: |
| 115 | * prefix "Z" is present in front of the stemmed words). |
| 116 | * - STEM_ALL: Index only stemmed words.(note: stemmed words DONT have "Z" prefix). |
| 117 | * - STEM_ALL_Z: Index only stemmed words. (note: stemmed words have "Z" prefix). |
| 118 | */ |
| 119 | void set_stemming_strategy(stem_strategy strategy); |
| 120 | |
105 | 121 | /** Index some text. |
106 | 122 | * |
107 | 123 | * @param itor Utf8Iterator pointing to the text to index. |
diff --git a/xapian-core/queryparser/queryparser.lemony b/xapian-core/queryparser/queryparser.lemony
index 8dedb80..4c8a642 100644
a
|
b
|
string
|
291 | 291 | Term::make_term(const string & prefix) const |
292 | 292 | { |
293 | 293 | string term; |
294 | | if (stem == QueryParser::STEM_SOME) term += 'Z'; |
| 294 | if (stem == QueryParser::STEM_SOME || stem == QueryParser::STEM_ALL_Z) term += 'Z'; |
295 | 295 | if (!prefix.empty()) { |
296 | 296 | term += prefix; |
297 | 297 | if (prefix_needs_colon(prefix, name[0])) term += ':'; |
diff --git a/xapian-core/queryparser/termgenerator.cc b/xapian-core/queryparser/termgenerator.cc
index e6f745f..176b188 100644
a
|
b
|
TermGenerator::set_flags(flags toggle, flags mask)
|
83 | 83 | } |
84 | 84 | |
85 | 85 | void |
| 86 | TermGenerator::set_stemming_strategy(stem_strategy strategy) |
| 87 | { |
| 88 | internal->stem_action = strategy; |
| 89 | } |
| 90 | |
| 91 | void |
86 | 92 | TermGenerator::index_text(const Xapian::Utf8Iterator & itor, |
87 | 93 | Xapian::termcount weight, |
88 | 94 | const string & prefix) |
diff --git a/xapian-core/queryparser/termgenerator_internal.cc b/xapian-core/queryparser/termgenerator_internal.cc
index 93f04a1..efa8780 100644
a
|
b
|
TermGenerator::Internal::index_text(Utf8Iterator itor, termcount wdf_inc,
|
177 | 177 | if (stop_mode == STOPWORDS_IGNORE && (*stopper)(cjk_token)) |
178 | 178 | continue; |
179 | 179 | |
180 | | if (with_positions && tk.get_length() == 1) { |
181 | | doc.add_posting(prefix + cjk_token, ++termpos, wdf_inc); |
182 | | } else { |
183 | | doc.add_term(prefix + cjk_token, wdf_inc); |
| 180 | if(stem_action == TermGenerator::STEM_SOME || stem_action == TermGenerator::STEM_NONE) { |
| 181 | if (with_positions && tk.get_length() == 1) { |
| 182 | doc.add_posting(prefix + cjk_token, ++termpos, wdf_inc); |
| 183 | } else { |
| 184 | doc.add_term(prefix + cjk_token, wdf_inc); |
| 185 | } |
184 | 186 | } |
| 187 | |
185 | 188 | if ((flags & FLAG_SPELLING) && prefix.empty()) |
186 | 189 | db.add_spelling(cjk_token); |
187 | 190 | |
… |
… |
TermGenerator::Internal::index_text(Utf8Iterator itor, termcount wdf_inc,
|
196 | 199 | if (!should_stem(cjk_token)) continue; |
197 | 200 | |
198 | 201 | // Add stemmed form without positional information. |
199 | | string stem("Z"); |
200 | | stem += prefix; |
201 | | stem += stemmer(cjk_token); |
202 | | doc.add_term(stem, wdf_inc); |
| 202 | if (stem_action != TermGenerator::STEM_NONE) { |
| 203 | string stem(""); |
| 204 | if(stem_action != TermGenerator::STEM_ALL) { |
| 205 | stem += "Z"; |
| 206 | } |
| 207 | stem += prefix; |
| 208 | stem += stemmer(cjk_token); |
| 209 | doc.add_term(stem, wdf_inc); |
| 210 | } |
203 | 211 | } |
204 | 212 | while (true) { |
205 | 213 | if (itor == Utf8Iterator()) return; |
… |
… |
endofterm:
|
258 | 266 | |
259 | 267 | if (stop_mode == STOPWORDS_IGNORE && (*stopper)(term)) continue; |
260 | 268 | |
261 | | if (with_positions) { |
262 | | doc.add_posting(prefix + term, ++termpos, wdf_inc); |
263 | | } else { |
264 | | doc.add_term(prefix + term, wdf_inc); |
| 269 | if(stem_action == TermGenerator::STEM_SOME || stem_action == TermGenerator::STEM_NONE) { |
| 270 | if (with_positions) { |
| 271 | doc.add_posting(prefix + term, ++termpos, wdf_inc); |
| 272 | } else { |
| 273 | doc.add_term(prefix + term, wdf_inc); |
| 274 | } |
265 | 275 | } |
266 | 276 | if ((flags & FLAG_SPELLING) && prefix.empty()) db.add_spelling(term); |
267 | 277 | |
… |
… |
endofterm:
|
275 | 285 | if (!should_stem(term)) continue; |
276 | 286 | |
277 | 287 | // Add stemmed form without positional information. |
278 | | string stem("Z"); |
279 | | stem += prefix; |
280 | | stem += stemmer(term); |
281 | | doc.add_term(stem, wdf_inc); |
| 288 | if (stem_action != TermGenerator::STEM_NONE) { |
| 289 | string stem(""); |
| 290 | if(stem_action != TermGenerator::STEM_ALL) { |
| 291 | stem += "Z"; |
| 292 | } |
| 293 | stem += prefix; |
| 294 | stem += stemmer(term); |
| 295 | doc.add_term(stem, wdf_inc); |
| 296 | } |
282 | 297 | } |
283 | 298 | } |
284 | 299 | |
diff --git a/xapian-core/queryparser/termgenerator_internal.h b/xapian-core/queryparser/termgenerator_internal.h
index f074fd9..f90f7c2 100644
a
|
b
|
class Stopper;
|
34 | 34 | class TermGenerator::Internal : public Xapian::Internal::intrusive_base { |
35 | 35 | friend class TermGenerator; |
36 | 36 | Stem stemmer; |
| 37 | stem_strategy stem_action; |
37 | 38 | const Stopper * stopper; |
38 | 39 | Document doc; |
39 | 40 | termcount termpos; |
… |
… |
class TermGenerator::Internal : public Xapian::Internal::intrusive_base {
|
41 | 42 | WritableDatabase db; |
42 | 43 | |
43 | 44 | public: |
44 | | Internal() : stopper(NULL), termpos(0), |
| 45 | Internal() : stem_action(STEM_SOME), stopper(NULL), termpos(0), |
45 | 46 | flags(TermGenerator::flags(0)) { } |
46 | 47 | void index_text(Utf8Iterator itor, |
47 | 48 | termcount weight, |