diff --git a/xapian-core/include/xapian/queryparser.h b/xapian-core/include/xapian/queryparser.h
index 829e187..089a832 100644
|
a
|
b
|
class XAPIAN_VISIBILITY_DEFAULT QueryParser {
|
| 409 | 409 | FLAG_DEFAULT = FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE |
| 410 | 410 | } feature_flag; |
| 411 | 411 | |
| 412 | | typedef enum { STEM_NONE, STEM_SOME, STEM_ALL } stem_strategy; |
| | 412 | typedef enum { STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z } stem_strategy; |
| 413 | 413 | |
| 414 | 414 | /// Copy constructor. |
| 415 | 415 | QueryParser(const QueryParser & o); |
| … |
… |
class XAPIAN_VISIBILITY_DEFAULT QueryParser {
|
| 452 | 452 | * terms are prefixed with 'Z'. (default in Xapian > 1.3.1) |
| 453 | 453 | * - STEM_ALL: Search for stemmed forms of all words (note: no 'Z' |
| 454 | 454 | * prefix is added). |
| | 455 | * - STEM_ALL_Z: Search for stemmed forms of all words (note: 'Z' |
| | 456 | * prefix is added). |
| 455 | 457 | */ |
| 456 | 458 | void set_stemming_strategy(stem_strategy strategy); |
| 457 | 459 | |
diff --git a/xapian-core/include/xapian/termgenerator.h b/xapian-core/include/xapian/termgenerator.h
index 28f4294..28af489 100644
|
a
|
b
|
class XAPIAN_VISIBILITY_DEFAULT TermGenerator {
|
| 88 | 88 | FLAG_SPELLING = 128 // Value matches QueryParser flag. |
| 89 | 89 | }; |
| 90 | 90 | |
| | 91 | /** Modes for indexing text |
| | 92 | * Following option provides matching modes for QueryParser and TermGenerator. |
| | 93 | */ |
| | 94 | typedef enum { STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z } stem_strategy; |
| | 95 | |
| 91 | 96 | /** Set flags. |
| 92 | 97 | * |
| 93 | 98 | * The new value of flags is: (flags & mask) ^ toggle |
| … |
… |
class XAPIAN_VISIBILITY_DEFAULT TermGenerator {
|
| 102 | 107 | */ |
| 103 | 108 | flags set_flags(flags toggle, flags mask = flags(0)); |
| 104 | 109 | |
| | 110 | /** Set the mode for indexing. (default is STEM_SOME) |
| | 111 | * |
| | 112 | * @param strategy The strategy to use - possible values are: |
| | 113 | * - STEM_NONE: Don't index any stemmed word |
| | 114 | * - STEM_SOME: Index both stemmed as well as full (non-stemmed) words.(note: |
| | 115 | * prefix "Z" is present in front of the stemmed words). |
| | 116 | * - STEM_ALL: Index only stemmed words.(note: stemmed words DONT have "Z" prefix). |
| | 117 | * - STEM_ALL_Z: Index only stemmed words. (note: stemmed words have "Z" prefix). |
| | 118 | */ |
| | 119 | void set_stemming_strategy(stem_strategy strategy); |
| | 120 | |
| 105 | 121 | /** Index some text. |
| 106 | 122 | * |
| 107 | 123 | * @param itor Utf8Iterator pointing to the text to index. |
diff --git a/xapian-core/queryparser/queryparser.lemony b/xapian-core/queryparser/queryparser.lemony
index 8dedb80..4c8a642 100644
|
a
|
b
|
string
|
| 291 | 291 | Term::make_term(const string & prefix) const |
| 292 | 292 | { |
| 293 | 293 | string term; |
| 294 | | if (stem == QueryParser::STEM_SOME) term += 'Z'; |
| | 294 | if (stem == QueryParser::STEM_SOME || stem == QueryParser::STEM_ALL_Z) term += 'Z'; |
| 295 | 295 | if (!prefix.empty()) { |
| 296 | 296 | term += prefix; |
| 297 | 297 | if (prefix_needs_colon(prefix, name[0])) term += ':'; |
diff --git a/xapian-core/queryparser/termgenerator.cc b/xapian-core/queryparser/termgenerator.cc
index e6f745f..176b188 100644
|
a
|
b
|
TermGenerator::set_flags(flags toggle, flags mask)
|
| 83 | 83 | } |
| 84 | 84 | |
| 85 | 85 | void |
| | 86 | TermGenerator::set_stemming_strategy(stem_strategy strategy) |
| | 87 | { |
| | 88 | internal->stem_action = strategy; |
| | 89 | } |
| | 90 | |
| | 91 | void |
| 86 | 92 | TermGenerator::index_text(const Xapian::Utf8Iterator & itor, |
| 87 | 93 | Xapian::termcount weight, |
| 88 | 94 | const string & prefix) |
diff --git a/xapian-core/queryparser/termgenerator_internal.cc b/xapian-core/queryparser/termgenerator_internal.cc
index 93f04a1..efa8780 100644
|
a
|
b
|
TermGenerator::Internal::index_text(Utf8Iterator itor, termcount wdf_inc,
|
| 177 | 177 | if (stop_mode == STOPWORDS_IGNORE && (*stopper)(cjk_token)) |
| 178 | 178 | continue; |
| 179 | 179 | |
| 180 | | if (with_positions && tk.get_length() == 1) { |
| 181 | | doc.add_posting(prefix + cjk_token, ++termpos, wdf_inc); |
| 182 | | } else { |
| 183 | | doc.add_term(prefix + cjk_token, wdf_inc); |
| | 180 | if(stem_action == TermGenerator::STEM_SOME || stem_action == TermGenerator::STEM_NONE) { |
| | 181 | if (with_positions && tk.get_length() == 1) { |
| | 182 | doc.add_posting(prefix + cjk_token, ++termpos, wdf_inc); |
| | 183 | } else { |
| | 184 | doc.add_term(prefix + cjk_token, wdf_inc); |
| | 185 | } |
| 184 | 186 | } |
| | 187 | |
| 185 | 188 | if ((flags & FLAG_SPELLING) && prefix.empty()) |
| 186 | 189 | db.add_spelling(cjk_token); |
| 187 | 190 | |
| … |
… |
TermGenerator::Internal::index_text(Utf8Iterator itor, termcount wdf_inc,
|
| 196 | 199 | if (!should_stem(cjk_token)) continue; |
| 197 | 200 | |
| 198 | 201 | // Add stemmed form without positional information. |
| 199 | | string stem("Z"); |
| 200 | | stem += prefix; |
| 201 | | stem += stemmer(cjk_token); |
| 202 | | doc.add_term(stem, wdf_inc); |
| | 202 | if (stem_action != TermGenerator::STEM_NONE) { |
| | 203 | string stem(""); |
| | 204 | if(stem_action != TermGenerator::STEM_ALL) { |
| | 205 | stem += "Z"; |
| | 206 | } |
| | 207 | stem += prefix; |
| | 208 | stem += stemmer(cjk_token); |
| | 209 | doc.add_term(stem, wdf_inc); |
| | 210 | } |
| 203 | 211 | } |
| 204 | 212 | while (true) { |
| 205 | 213 | if (itor == Utf8Iterator()) return; |
| … |
… |
endofterm:
|
| 258 | 266 | |
| 259 | 267 | if (stop_mode == STOPWORDS_IGNORE && (*stopper)(term)) continue; |
| 260 | 268 | |
| 261 | | if (with_positions) { |
| 262 | | doc.add_posting(prefix + term, ++termpos, wdf_inc); |
| 263 | | } else { |
| 264 | | doc.add_term(prefix + term, wdf_inc); |
| | 269 | if(stem_action == TermGenerator::STEM_SOME || stem_action == TermGenerator::STEM_NONE) { |
| | 270 | if (with_positions) { |
| | 271 | doc.add_posting(prefix + term, ++termpos, wdf_inc); |
| | 272 | } else { |
| | 273 | doc.add_term(prefix + term, wdf_inc); |
| | 274 | } |
| 265 | 275 | } |
| 266 | 276 | if ((flags & FLAG_SPELLING) && prefix.empty()) db.add_spelling(term); |
| 267 | 277 | |
| … |
… |
endofterm:
|
| 275 | 285 | if (!should_stem(term)) continue; |
| 276 | 286 | |
| 277 | 287 | // Add stemmed form without positional information. |
| 278 | | string stem("Z"); |
| 279 | | stem += prefix; |
| 280 | | stem += stemmer(term); |
| 281 | | doc.add_term(stem, wdf_inc); |
| | 288 | if (stem_action != TermGenerator::STEM_NONE) { |
| | 289 | string stem(""); |
| | 290 | if(stem_action != TermGenerator::STEM_ALL) { |
| | 291 | stem += "Z"; |
| | 292 | } |
| | 293 | stem += prefix; |
| | 294 | stem += stemmer(term); |
| | 295 | doc.add_term(stem, wdf_inc); |
| | 296 | } |
| 282 | 297 | } |
| 283 | 298 | } |
| 284 | 299 | |
diff --git a/xapian-core/queryparser/termgenerator_internal.h b/xapian-core/queryparser/termgenerator_internal.h
index f074fd9..f90f7c2 100644
|
a
|
b
|
class Stopper;
|
| 34 | 34 | class TermGenerator::Internal : public Xapian::Internal::intrusive_base { |
| 35 | 35 | friend class TermGenerator; |
| 36 | 36 | Stem stemmer; |
| | 37 | stem_strategy stem_action; |
| 37 | 38 | const Stopper * stopper; |
| 38 | 39 | Document doc; |
| 39 | 40 | termcount termpos; |
| … |
… |
class TermGenerator::Internal : public Xapian::Internal::intrusive_base {
|
| 41 | 42 | WritableDatabase db; |
| 42 | 43 | |
| 43 | 44 | public: |
| 44 | | Internal() : stopper(NULL), termpos(0), |
| | 45 | Internal() : stem_action(STEM_SOME), stopper(NULL), termpos(0), |
| 45 | 46 | flags(TermGenerator::flags(0)) { } |
| 46 | 47 | void index_text(Utf8Iterator itor, |
| 47 | 48 | termcount weight, |