Ticket #562: modified.patch

File modified.patch, 7.0 KB (added by Sehaj Singh Kalra, 12 years ago)

Combined Patch for this(#562) as well as #563

  • xapian-core/include/xapian/queryparser.h

    diff --git a/xapian-core/include/xapian/queryparser.h b/xapian-core/include/xapian/queryparser.h
    index 829e187..089a832 100644
    a b class XAPIAN_VISIBILITY_DEFAULT QueryParser {  
    409409        FLAG_DEFAULT = FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE
    410410    } feature_flag;
    411411
    412     typedef enum { STEM_NONE, STEM_SOME, STEM_ALL } stem_strategy;
     412    typedef enum { STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z } stem_strategy;
    413413
    414414    /// Copy constructor.
    415415    QueryParser(const QueryParser & o);
    class XAPIAN_VISIBILITY_DEFAULT QueryParser {  
    452452     *                terms are prefixed with 'Z'.  (default in Xapian > 1.3.1)
    453453     *   - STEM_ALL:  Search for stemmed forms of all words (note: no 'Z'
    454454     *                prefix is added).
     455     *   - STEM_ALL_Z:  Search for stemmed forms of all words (note: 'Z'
     456     *                prefix is added).
    455457     */
    456458    void set_stemming_strategy(stem_strategy strategy);
    457459
  • xapian-core/include/xapian/termgenerator.h

    diff --git a/xapian-core/include/xapian/termgenerator.h b/xapian-core/include/xapian/termgenerator.h
    index 28f4294..28af489 100644
    a b class XAPIAN_VISIBILITY_DEFAULT TermGenerator {  
    8888        FLAG_SPELLING = 128 // Value matches QueryParser flag.
    8989    };
    9090
     91    /** Modes for indexing text
     92     * Following option provides matching modes for QueryParser and TermGenerator.
     93     */
     94    typedef enum { STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z } stem_strategy;
     95
    9196    /** Set flags.
    9297     *
    9398     *  The new value of flags is: (flags & mask) ^ toggle
    class XAPIAN_VISIBILITY_DEFAULT TermGenerator {  
    102107     */
    103108    flags set_flags(flags toggle, flags mask = flags(0));
    104109
     110    /** Set the mode for indexing. (default is STEM_SOME)
     111     *
     112     *  @param strategy The strategy to use - possible values are:
     113     *  - STEM_NONE: Don't index any stemmed word
     114     *  - STEM_SOME: Index both stemmed as well as full (non-stemmed) words.(note:
     115     *                prefix "Z" is present in front of the stemmed words).
     116     *  - STEM_ALL:  Index only stemmed words.(note: stemmed words DONT have "Z" prefix).
     117     *  - STEM_ALL_Z:  Index only stemmed words. (note: stemmed words have "Z" prefix).
     118     */
     119    void set_stemming_strategy(stem_strategy strategy);
     120
    105121    /** Index some text.
    106122     *
    107123     * @param itor      Utf8Iterator pointing to the text to index.
  • xapian-core/queryparser/queryparser.lemony

    diff --git a/xapian-core/queryparser/queryparser.lemony b/xapian-core/queryparser/queryparser.lemony
    index 8dedb80..4c8a642 100644
    a b string  
    291291Term::make_term(const string & prefix) const
    292292{
    293293    string term;
    294     if (stem == QueryParser::STEM_SOME) term += 'Z';
     294    if (stem == QueryParser::STEM_SOME || stem == QueryParser::STEM_ALL_Z) term += 'Z';
    295295    if (!prefix.empty()) {
    296296        term += prefix;
    297297        if (prefix_needs_colon(prefix, name[0])) term += ':';
  • xapian-core/queryparser/termgenerator.cc

    diff --git a/xapian-core/queryparser/termgenerator.cc b/xapian-core/queryparser/termgenerator.cc
    index e6f745f..176b188 100644
    a b TermGenerator::set_flags(flags toggle, flags mask)  
    8383}
    8484
    8585void
     86TermGenerator::set_stemming_strategy(stem_strategy strategy)
     87{
     88    internal->stem_action = strategy;
     89}
     90
     91void
    8692TermGenerator::index_text(const Xapian::Utf8Iterator & itor,
    8793                          Xapian::termcount weight,
    8894                          const string & prefix)
  • xapian-core/queryparser/termgenerator_internal.cc

    diff --git a/xapian-core/queryparser/termgenerator_internal.cc b/xapian-core/queryparser/termgenerator_internal.cc
    index 93f04a1..efa8780 100644
    a b TermGenerator::Internal::index_text(Utf8Iterator itor, termcount wdf_inc,  
    177177                    if (stop_mode == STOPWORDS_IGNORE && (*stopper)(cjk_token))
    178178                        continue;
    179179
    180                     if (with_positions && tk.get_length() == 1) {
    181                         doc.add_posting(prefix + cjk_token, ++termpos, wdf_inc);
    182                     } else {
    183                         doc.add_term(prefix + cjk_token, wdf_inc);
     180                    if(stem_action == TermGenerator::STEM_SOME || stem_action == TermGenerator::STEM_NONE) {
     181                        if (with_positions && tk.get_length() == 1) {
     182                            doc.add_posting(prefix + cjk_token, ++termpos, wdf_inc);
     183                        } else {
     184                            doc.add_term(prefix + cjk_token, wdf_inc);
     185                        }
    184186                    }
     187                   
    185188                    if ((flags & FLAG_SPELLING) && prefix.empty())
    186189                        db.add_spelling(cjk_token);
    187190
    TermGenerator::Internal::index_text(Utf8Iterator itor, termcount wdf_inc,  
    196199                    if (!should_stem(cjk_token)) continue;
    197200
    198201                    // Add stemmed form without positional information.
    199                     string stem("Z");
    200                     stem += prefix;
    201                     stem += stemmer(cjk_token);
    202                     doc.add_term(stem, wdf_inc);
     202                    if (stem_action != TermGenerator::STEM_NONE) {
     203                        string stem("");
     204                        if(stem_action != TermGenerator::STEM_ALL) {   
     205                                stem += "Z";
     206                        }
     207                        stem += prefix;
     208                        stem += stemmer(cjk_token);
     209                        doc.add_term(stem, wdf_inc);
     210                   }
    203211                }
    204212                while (true) {
    205213                    if (itor == Utf8Iterator()) return;
    endofterm:  
    258266
    259267        if (stop_mode == STOPWORDS_IGNORE && (*stopper)(term)) continue;
    260268
    261         if (with_positions) {
    262             doc.add_posting(prefix + term, ++termpos, wdf_inc);
    263         } else {
    264             doc.add_term(prefix + term, wdf_inc);
     269        if(stem_action == TermGenerator::STEM_SOME || stem_action == TermGenerator::STEM_NONE) {
     270                if (with_positions) {
     271                    doc.add_posting(prefix + term, ++termpos, wdf_inc);
     272                } else {
     273                    doc.add_term(prefix + term, wdf_inc);
     274                }
    265275        }
    266276        if ((flags & FLAG_SPELLING) && prefix.empty()) db.add_spelling(term);
    267277
    endofterm:  
    275285        if (!should_stem(term)) continue;
    276286
    277287        // Add stemmed form without positional information.
    278         string stem("Z");
    279         stem += prefix;
    280         stem += stemmer(term);
    281         doc.add_term(stem, wdf_inc);
     288        if (stem_action != TermGenerator::STEM_NONE) {
     289                string stem("");
     290                if(stem_action != TermGenerator::STEM_ALL) {   
     291                        stem += "Z";
     292                }
     293                stem += prefix;
     294                stem += stemmer(term);
     295                doc.add_term(stem, wdf_inc);
     296        }
    282297    }
    283298}
    284299
  • xapian-core/queryparser/termgenerator_internal.h

    diff --git a/xapian-core/queryparser/termgenerator_internal.h b/xapian-core/queryparser/termgenerator_internal.h
    index f074fd9..f90f7c2 100644
    a b class Stopper;  
    3434class TermGenerator::Internal : public Xapian::Internal::intrusive_base {
    3535    friend class TermGenerator;
    3636    Stem stemmer;
     37    stem_strategy stem_action;
    3738    const Stopper * stopper;
    3839    Document doc;
    3940    termcount termpos;
    class TermGenerator::Internal : public Xapian::Internal::intrusive_base {  
    4142    WritableDatabase db;
    4243
    4344  public:
    44     Internal() : stopper(NULL), termpos(0),
     45    Internal() : stem_action(STEM_SOME), stopper(NULL), termpos(0),
    4546        flags(TermGenerator::flags(0)) { }
    4647    void index_text(Utf8Iterator itor,
    4748                    termcount weight,