Ticket #448: xapian-core-stem-implementation-3.r14269.patch

File xapian-core-stem-implementation-3.r14269.patch, 13.0 KB (added by Evgeny Sizikov, 15 years ago)

Reverts some code alignment not related to the ticket's main idea

  • xapian-core/include/xapian/stem.h

     
    2828
    2929namespace Xapian {
    3030
     31/// Class representing a stemming algorithm implementation.
     32struct StemImplementation : public Xapian::Internal::RefCntBase
     33{
     34    /// Perform cleanup common to all stemmers.
     35    virtual ~StemImplementation();
     36
     37    /// Stem the specified word.
     38    virtual std::string operator()(const std::string & word) = 0;
     39
     40    /// Return string describing this object.
     41    virtual const char * get_description() const = 0;
     42};
     43
    3144/// Class representing a stemming algorithm.
    3245class XAPIAN_VISIBILITY_DEFAULT Stem {
    3346  public:
    34     /// @private @internal Class representing the stemmer internals.
    35     class Internal;
    3647    /// @private @internal Reference counted internals.
    37     Xapian::Internal::RefCntPtr<Internal> internal;
     48    Xapian::Internal::RefCntPtr<StemImplementation> internal;
    3849
    3950    /// Copy constructor.
    4051    Stem(const Stem & o);
     
    4859     */
    4960    Stem();
    5061
     62    /** Construct a Xapian::Stem object with user-provided stemming algorithm.
     63     *
     64     *  User could create a subclass of Xapian::StemImplementation, and wrap
     65     *  it in a Xapian::Stem object to pass to the Xapian API.
     66     */
     67    Stem(StemImplementation * p);
     68
    5169    /** Construct a Xapian::Stem object for a particular language.
    5270     *
    5371     *  @param language Either the English name for the language
  • xapian-core/languages/Makefile.mk

     
    5656        $(CC_FOR_BUILD) -o languages/snowball -DDISABLE_JAVA `for f in $(snowball_sources) ; do test -f $$f && echo $$f || echo $(srcdir)/$$f ; done`
    5757
    5858.sbl.cc:
    59         languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem::Internal
     59        languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStemImplementation
    6060
    6161.sbl.h:
    62         languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem::Internal
     62        languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStemImplementation
    6363
    6464languages/allsnowballheaders.h: languages/generate-allsnowballheaders languages/Makefile.mk
    6565        languages/generate-allsnowballheaders $(snowball_built_sources)
  • xapian-core/languages/steminternal.cc

     
    128128}
    129129
    130130namespace Xapian {
    131 
    132 Stem::Internal::~Internal()
     131   
     132SnowballStemImplementation::~SnowballStemImplementation()
    133133{
    134134    lose_s(p);
    135135}
    136136
    137137string
    138 Stem::Internal::operator()(const string & word)
     138SnowballStemImplementation::operator()(const string & word)
    139139{
    140140    const symbol * s = reinterpret_cast<const symbol *>(word.data());
    141141    replace_s(0, l, word.size(), s);
     
    149149
    150150/* Code for character groupings: utf8 cases */
    151151
    152 int Stem::Internal::get_utf8(int * slot) {
     152int SnowballStemImplementation::get_utf8(int * slot) {
    153153    int b0, b1;
    154154    int tmp = c;
    155155    if (tmp >= l) return 0;
     
    164164    * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[tmp] & 0x3F); return 3;
    165165}
    166166
    167 int Stem::Internal::get_b_utf8(int * slot) {
     167int SnowballStemImplementation::get_b_utf8(int * slot) {
    168168    int b0, b1;
    169169    int tmp = c;
    170170    if (tmp <= lb) return 0;
     
    179179    * slot = (p[tmp] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
    180180}
    181181
    182 int Stem::Internal::in_grouping_U(const unsigned char * s, int min, int max, int repeat) {
     182int
     183SnowballStemImplementation::in_grouping_U(const unsigned char * s, int min,
     184                                          int max, int repeat) {
    183185    do {
    184186        int ch;
    185187        int w = get_utf8(&ch);
    186188        if (!w) return -1;
    187         if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
     189        if (ch > max || (ch -= min) < 0 ||
     190            (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
    188191            return w;
    189192        c += w;
    190193    } while (repeat);
    191194    return 0;
    192195}
    193196
    194 int Stem::Internal::in_grouping_b_U(const unsigned char * s, int min, int max, int repeat) {
     197int
     198SnowballStemImplementation::in_grouping_b_U(const unsigned char * s, int min,
     199                                            int max, int repeat) {
    195200    do {
    196201        int ch;
    197202        int w = get_b_utf8(&ch);
     
    203208    return 0;
    204209}
    205210
    206 int Stem::Internal::out_grouping_U(const unsigned char * s, int min, int max, int repeat) {
     211int
     212SnowballStemImplementation::out_grouping_U(const unsigned char * s, int min,
     213                                           int max, int repeat) {
    207214    do {
    208215        int ch;
    209216        int w = get_utf8(&ch);
     
    215222    return 0;
    216223}
    217224
    218 int Stem::Internal::out_grouping_b_U(const unsigned char * s, int min, int max, int repeat) {
     225int
     226SnowballStemImplementation::out_grouping_b_U(const unsigned char * s, int min,
     227                                             int max, int repeat) {
    219228    do {
    220229        int ch;
    221230        int w = get_b_utf8(&ch);
     
    227236    return 0;
    228237}
    229238
    230 int Stem::Internal::eq_s(int s_size, const symbol * s) {
     239int SnowballStemImplementation::eq_s(int s_size, const symbol * s) {
    231240    if (l - c < s_size || memcmp(p + c, s, s_size * sizeof(symbol)) != 0)
    232241        return 0;
    233242    c += s_size;
    234243    return 1;
    235244}
    236245
    237 int Stem::Internal::eq_s_b(int s_size, const symbol * s) {
     246int SnowballStemImplementation::eq_s_b(int s_size, const symbol * s) {
    238247    if (c - lb < s_size || memcmp(p + c - s_size, s, s_size * sizeof(symbol)) != 0)
    239248        return 0;
    240249    c -= s_size;
     
    242251}
    243252
    244253int
    245 Stem::Internal::find_among(const symbol * pool, const struct among * v,
    246                            int v_size, const unsigned char * fnum,
    247                            const among_function * f)
     254SnowballStemImplementation::find_among(const symbol * pool,
     255                                       const struct among * v, int v_size,
     256                                       const unsigned char * fnum,
     257                                       const among_function * f)
    248258{
    249259    int i = 0;
    250260    int j = v_size;
     
    300310
    301311/* find_among_b is for backwards processing. Same comments apply */
    302312int
    303 Stem::Internal::find_among_b(const symbol * pool, const struct among * v,
    304                              int v_size, const unsigned char * fnum,
    305                              const among_function * f)
     313SnowballStemImplementation::find_among_b(const symbol * pool,
     314                                         const struct among * v, int v_size,
     315                                         const unsigned char * fnum,
     316                                         const among_function * f)
    306317{
    307318    int i = 0;
    308319    int j = v_size;
     
    352363}
    353364
    354365int
    355 Stem::Internal::replace_s(int c_bra, int c_ket, int s_size, const symbol * s)
     366SnowballStemImplementation::replace_s(int c_bra, int c_ket, int s_size,
     367                                      const symbol * s)
    356368{
    357369    int adjustment;
    358370    int len;
     
    378390    return adjustment;
    379391}
    380392
    381 int Stem::Internal::slice_check() {
     393int SnowballStemImplementation::slice_check() {
    382394    Assert(p);
    383395    if (bra < 0 || bra > ket || ket > l) {
    384396#if 0
     
    390402    return 0;
    391403}
    392404
    393 int Stem::Internal::slice_from_s(int s_size, const symbol * s) {
     405int SnowballStemImplementation::slice_from_s(int s_size, const symbol * s) {
    394406    if (slice_check()) return -1;
    395407    replace_s(bra, ket, s_size, s);
    396408    return 0;
    397409}
    398410
    399 void Stem::Internal::insert_s(int c_bra, int c_ket, int s_size, const symbol * s) {
     411void
     412SnowballStemImplementation::insert_s(int c_bra, int c_ket, int s_size,
     413                                     const symbol * s) {
    400414    int adjustment = replace_s(c_bra, c_ket, s_size, s);
    401415    if (c_bra <= bra) bra += adjustment;
    402416    if (c_bra <= ket) ket += adjustment;
    403417}
    404418
    405 symbol * Stem::Internal::slice_to(symbol * v) {
     419symbol * SnowballStemImplementation::slice_to(symbol * v) {
    406420    if (slice_check()) return NULL;
    407421    {
    408422        int len = ket - bra;
     
    415429    return v;
    416430}
    417431
    418 symbol * Stem::Internal::assign_to(symbol * v) {
     432symbol * SnowballStemImplementation::assign_to(symbol * v) {
    419433    int len = l;
    420434    if (CAPACITY(v) < len) {
    421435        v = increase_size(v, len);
     
    426440}
    427441
    428442#if 0
    429 void Stem::Internal::debug(int number, int line_count) {
     443void SnowballStemImplementation::debug(int number, int line_count) {
    430444    int i;
    431445    int limit = SIZE(p);
    432446    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
  • xapian-core/languages/steminternal.h

     
    2727#include <cstdlib>
    2828#include <string>
    2929
    30 // FIXME: we might want to make Stem::Internal a virtual base class and have
    31 // Stem::Internal::Snowball to allow for non-Snowball stemmers...
    32 
    3330typedef unsigned char symbol;
    3431
    3532#define HEAD (2*sizeof(int))
     
    6562    reinterpret_cast<int *>(void_p)[-2] = n;
    6663}
    6764
    68 typedef int (*among_function)(Xapian::Stem::Internal *);
     65typedef int (*among_function)(Xapian::StemImplementation *);
    6966
    7067struct among {
    7168    int s_size;         /* length of search string (in symbols) */
     
    8481
    8582namespace Xapian {
    8683
    87 class Stem::Internal : public Xapian::Internal::RefCntBase {
     84class SnowballStemImplementation : public StemImplementation {
    8885    int slice_check();
    8986
    9087  protected:
     
    129126
    130127  public:
    131128    /// Perform initialisation common to all Snowball stemmers.
    132     Internal() : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { }
     129    SnowballStemImplementation() : p(create_s()),
     130                                     c(0), l(0), lb(0), bra(0), ket(0) { }
    133131
    134132    /// Perform cleanup common to all Snowball stemmers.
    135     virtual ~Internal();
     133    virtual ~SnowballStemImplementation();
    136134
    137135    /// Stem the specified word.
    138     std::string operator()(const std::string & word);
     136    virtual std::string operator()(const std::string & word);
    139137
    140138    /// Virtual method implemented by the subclass to actually do the work.
    141139    virtual int stem() = 0;
    142 
    143     /// Return string describing this object.
    144     virtual const char * get_description() const = 0;
    145140};
    146141
    147142}
  • xapian-core/languages/stem.cc

     
    3434
    3535namespace Xapian {
    3636
     37StemImplementation::~StemImplementation() { }
     38
    3739Stem::Stem(const Stem & o) : internal(o.internal) { }
    3840
    3941void
     
    4446
    4547Stem::Stem() : internal(0) { }
    4648
     49Stem::Stem(StemImplementation * p) : internal(p) { }
     50
    4751Stem::Stem(const std::string &language) : internal(0) {
    4852    if (language.empty()) return;
    4953    switch (language[0]) {
  • xapian-core/languages/compiler/generator.c

     
    15251525        if (q->type == t_routine && q->routine_called_from_among) {
    15261526            q->among_func_count = ++among_func_count;
    15271527            g->V[0] = q;
    1528             w(g, "static int t~V0(Xapian::Stem::Internal * this_ptr) {~N"
     1528            w(g, "static int t~V0(Xapian::StemImplementation * this_ptr) {~N"
    15291529                 "    return (static_cast<Xapian::~S0 *>(this_ptr))->~V0();~N"
    15301530                 "}~N"
    15311531                 "~N");
  • xapian-bindings/csharp/Makefile.am

     
    4141        QueryParser.cs \
    4242        Remote.cs \
    4343        RSet.cs \
     44        RefCntBase.cs \
    4445        SWIGTYPE_p_std__string.cs \
    4546        SWIGTYPE_p_std__vectorT_std__string_t.cs \
    4647        SWIGTYPE_p_std__vectorT_Xapian__Query_t.cs \
     
    4849        SimpleStopper.cs \
    4950        Sorter.cs \
    5051        Stem.cs \
     52        StemImplementation.cs \
    5153        Stopper.cs \
    5254        StringValueRangeProcessor.cs \
    5355        TermGenerator.cs \
  • xapian-bindings/xapian.i

     
    768768%ignore Xapian::QueryParser::QueryParser(const QueryParser &);
    769769%include <xapian/queryparser.h>
    770770
     771namespace Xapian {
     772namespace Internal {
     773class RefCntBase {
     774  protected:
     775        RefCntBase(const RefCntBase &);
     776
     777  public:
     778        RefCntBase();
     779        typedef unsigned int ref_count_t;
     780
     781        mutable ref_count_t ref_count;
     782};
     783}
     784}
     785%nodefaultctor Xapian::StemImplementation;  // No default constructor for Xapian::StemImplementation
    771786%ignore Xapian::Stem::internal;
    772787%ignore Xapian::Stem::operator=;
    773788%ignore Xapian::Stem::Stem();