Ticket #448: xapian-core-stem-implementation-2.patch

File xapian-core-stem-implementation-2.patch, 13.4 KB (added by Evgeny Sizikov, 15 years ago)

Fixed C# binding build error, moved Xapian::Stem::Implementation into the xapian/stem.h header. The only question is about the name for the Xapian::SnowballStemImplementation class ...

  • xapian-core/include/xapian/stem.h

     
    3131/// Class representing a stemming algorithm.
    3232class XAPIAN_VISIBILITY_DEFAULT Stem {
    3333  public:
    34     /// @private @internal Class representing the stemmer internals.
    35     class Internal;
     34
     35#ifdef SWIG
     36    /// @private @internal Class representing a stemming algorithm implementation.
     37    struct Implementation;
     38#else
     39    /// Class representing a stemming algorithm implementation.
     40    struct Implementation : public Xapian::Internal::RefCntBase
     41    {
     42        /// Perform initialisation common to all stemmers.
     43        Implementation();
     44
     45        /// Perform cleanup common to all stemmers.
     46        virtual ~Implementation();
     47
     48        /// Stem the specified word.
     49        virtual std::string operator()(const std::string & word) = 0;
     50
     51        /// Return string describing this object.
     52        virtual const char * get_description() const = 0;
     53    };
     54#endif
     55
    3656    /// @private @internal Reference counted internals.
    37     Xapian::Internal::RefCntPtr<Internal> internal;
     57    Xapian::Internal::RefCntPtr<Implementation> internal;
    3858
    3959    /// Copy constructor.
    4060    Stem(const Stem & o);
     
    4868     */
    4969    Stem();
    5070
     71    /** Construct a Xapian::Stem object with user-provided stemming algorithm.
     72     *
     73     *  User could create a subclass of Xapian::Stem::Implementation, and wrap
     74     *  it in a Xapian::Stem object to pass to the Xapian API.
     75     */
     76    Stem(Implementation * p);
     77
    5178    /** Construct a Xapian::Stem object for a particular language.
    5279     *
    5380     *  @param language Either the English name for the language
  • xapian-core/languages/Makefile.mk

     
    5656        $(CC_FOR_BUILD) -o languages/snowball -DDISABLE_JAVA `for f in $(snowball_sources) ; do test -f $$f && echo $$f || echo $(srcdir)/$$f ; done`
    5757
    5858.sbl.cc:
    59         languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem::Internal
     59        languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStemImplementation
    6060
    6161.sbl.h:
    62         languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem::Internal
     62        languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStemImplementation
    6363
    6464languages/allsnowballheaders.h: languages/generate-allsnowballheaders languages/Makefile.mk
    6565        languages/generate-allsnowballheaders $(snowball_built_sources)
  • xapian-core/languages/steminternal.cc

     
    128128}
    129129
    130130namespace Xapian {
    131 
    132 Stem::Internal::~Internal()
     131   
     132SnowballStemImplementation::~SnowballStemImplementation()
    133133{
    134134    lose_s(p);
    135135}
    136136
    137137string
    138 Stem::Internal::operator()(const string & word)
     138SnowballStemImplementation::operator()(const string & word)
    139139{
    140140    const symbol * s = reinterpret_cast<const symbol *>(word.data());
    141141    replace_s(0, l, word.size(), s);
     
    149149
    150150/* Code for character groupings: utf8 cases */
    151151
    152 int Stem::Internal::get_utf8(int * slot) {
     152int SnowballStemImplementation::get_utf8(int * slot) {
    153153    int b0, b1;
    154154    int tmp = c;
    155155    if (tmp >= l) return 0;
     
    164164    * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[tmp] & 0x3F); return 3;
    165165}
    166166
    167 int Stem::Internal::get_b_utf8(int * slot) {
     167int SnowballStemImplementation::get_b_utf8(int * slot) {
    168168    int b0, b1;
    169169    int tmp = c;
    170170    if (tmp <= lb) return 0;
     
    179179    * slot = (p[tmp] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
    180180}
    181181
    182 int Stem::Internal::in_grouping_U(const unsigned char * s, int min, int max, int repeat) {
     182int
     183SnowballStemImplementation::in_grouping_U(const unsigned char * s, int min,
     184                                          int max, int repeat) {
    183185    do {
    184186        int ch;
    185187        int w = get_utf8(&ch);
    186188        if (!w) return -1;
    187         if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
     189        if (ch > max || (ch -= min) < 0 ||
     190            (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
    188191            return w;
    189192        c += w;
    190193    } while (repeat);
    191194    return 0;
    192195}
    193196
    194 int Stem::Internal::in_grouping_b_U(const unsigned char * s, int min, int max, int repeat) {
     197int
     198SnowballStemImplementation::in_grouping_b_U(const unsigned char * s, int min,
     199                                            int max, int repeat) {
    195200    do {
    196201        int ch;
    197202        int w = get_b_utf8(&ch);
    198203        if (!w) return -1;
    199         if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
     204        if (ch > max || (ch -= min) < 0 ||
     205            (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
    200206            return w;
    201207        c -= w;
    202208    } while (repeat);
    203209    return 0;
    204210}
    205211
    206 int Stem::Internal::out_grouping_U(const unsigned char * s, int min, int max, int repeat) {
     212int
     213SnowballStemImplementation::out_grouping_U(const unsigned char * s, int min,
     214                                           int max, int repeat) {
    207215    do {
    208216        int ch;
    209217        int w = get_utf8(&ch);
    210218        if (!w) return -1;
    211         if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
    212             /* FIXME: try adding this so gopast in generated code is simpler: if (repeat == 2) c += w; */ return w;
     219        if (!(ch > max || (ch -= min) < 0 ||
     220            (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
     221            /* FIXME: try adding this so gopast in generated code is simpler:
     222               if (repeat == 2) c += w; */ return w;
    213223        c += w;
    214224    } while (repeat);
    215225    return 0;
    216226}
    217227
    218 int Stem::Internal::out_grouping_b_U(const unsigned char * s, int min, int max, int repeat) {
     228int
     229SnowballStemImplementation::out_grouping_b_U(const unsigned char * s, int min,
     230                                             int max, int repeat) {
    219231    do {
    220232        int ch;
    221233        int w = get_b_utf8(&ch);
    222234        if (!w) return -1;
    223         if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
     235        if (!(ch > max || (ch -= min) < 0 ||
     236            (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
    224237            return w;
    225238        c -= w;
    226239    } while (repeat);
    227240    return 0;
    228241}
    229242
    230 int Stem::Internal::eq_s(int s_size, const symbol * s) {
     243int SnowballStemImplementation::eq_s(int s_size, const symbol * s) {
    231244    if (l - c < s_size || memcmp(p + c, s, s_size * sizeof(symbol)) != 0)
    232245        return 0;
    233246    c += s_size;
    234247    return 1;
    235248}
    236249
    237 int Stem::Internal::eq_s_b(int s_size, const symbol * s) {
    238     if (c - lb < s_size || memcmp(p + c - s_size, s, s_size * sizeof(symbol)) != 0)
     250int SnowballStemImplementation::eq_s_b(int s_size, const symbol * s) {
     251    if (c - lb < s_size ||
     252        memcmp(p + c - s_size, s, s_size * sizeof(symbol)) != 0)
    239253        return 0;
    240254    c -= s_size;
    241255    return 1;
    242256}
    243257
    244258int
    245 Stem::Internal::find_among(const symbol * pool, const struct among * v,
    246                            int v_size, const unsigned char * fnum,
    247                            const among_function * f)
     259SnowballStemImplementation::find_among(const symbol * pool,
     260                                       const struct among * v, int v_size,
     261                                       const unsigned char * fnum,
     262                                       const among_function * f)
    248263{
    249264    int i = 0;
    250265    int j = v_size;
     
    300315
    301316/* find_among_b is for backwards processing. Same comments apply */
    302317int
    303 Stem::Internal::find_among_b(const symbol * pool, const struct among * v,
    304                              int v_size, const unsigned char * fnum,
    305                              const among_function * f)
     318SnowballStemImplementation::find_among_b(const symbol * pool,
     319                                         const struct among * v, int v_size,
     320                                         const unsigned char * fnum,
     321                                         const among_function * f)
    306322{
    307323    int i = 0;
    308324    int j = v_size;
     
    352368}
    353369
    354370int
    355 Stem::Internal::replace_s(int c_bra, int c_ket, int s_size, const symbol * s)
     371SnowballStemImplementation::replace_s(int c_bra, int c_ket, int s_size,
     372                                      const symbol * s)
    356373{
    357374    int adjustment;
    358375    int len;
     
    378395    return adjustment;
    379396}
    380397
    381 int Stem::Internal::slice_check() {
     398int SnowballStemImplementation::slice_check() {
    382399    Assert(p);
    383400    if (bra < 0 || bra > ket || ket > l) {
    384401#if 0
     
    390407    return 0;
    391408}
    392409
    393 int Stem::Internal::slice_from_s(int s_size, const symbol * s) {
     410int SnowballStemImplementation::slice_from_s(int s_size, const symbol * s) {
    394411    if (slice_check()) return -1;
    395412    replace_s(bra, ket, s_size, s);
    396413    return 0;
    397414}
    398415
    399 void Stem::Internal::insert_s(int c_bra, int c_ket, int s_size, const symbol * s) {
     416void
     417SnowballStemImplementation::insert_s(int c_bra, int c_ket, int s_size,
     418                                     const symbol * s) {
    400419    int adjustment = replace_s(c_bra, c_ket, s_size, s);
    401420    if (c_bra <= bra) bra += adjustment;
    402421    if (c_bra <= ket) ket += adjustment;
    403422}
    404423
    405 symbol * Stem::Internal::slice_to(symbol * v) {
     424symbol * SnowballStemImplementation::slice_to(symbol * v) {
    406425    if (slice_check()) return NULL;
    407426    {
    408427        int len = ket - bra;
     
    415434    return v;
    416435}
    417436
    418 symbol * Stem::Internal::assign_to(symbol * v) {
     437symbol * SnowballStemImplementation::assign_to(symbol * v) {
    419438    int len = l;
    420439    if (CAPACITY(v) < len) {
    421440        v = increase_size(v, len);
     
    426445}
    427446
    428447#if 0
    429 void Stem::Internal::debug(int number, int line_count) {
     448void SnowballStemImplementation::debug(int number, int line_count) {
    430449    int i;
    431450    int limit = SIZE(p);
    432451    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
  • xapian-core/languages/steminternal.h

     
    2727#include <cstdlib>
    2828#include <string>
    2929
    30 // FIXME: we might want to make Stem::Internal a virtual base class and have
    31 // Stem::Internal::Snowball to allow for non-Snowball stemmers...
    32 
    3330typedef unsigned char symbol;
    3431
    3532#define HEAD (2*sizeof(int))
     
    6562    reinterpret_cast<int *>(void_p)[-2] = n;
    6663}
    6764
    68 typedef int (*among_function)(Xapian::Stem::Internal *);
     65typedef int (*among_function)(Xapian::Stem::Implementation *);
    6966
    7067struct among {
    7168    int s_size;         /* length of search string (in symbols) */
     
    8481
    8582namespace Xapian {
    8683
    87 class Stem::Internal : public Xapian::Internal::RefCntBase {
     84class SnowballStemImplementation : public Stem::Implementation {
    8885    int slice_check();
    8986
    9087  protected:
     
    129126
    130127  public:
    131128    /// Perform initialisation common to all Snowball stemmers.
    132     Internal() : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { }
     129    SnowballStemImplementation() : p(create_s()),
     130                                     c(0), l(0), lb(0), bra(0), ket(0) { }
    133131
    134132    /// Perform cleanup common to all Snowball stemmers.
    135     virtual ~Internal();
     133    virtual ~SnowballStemImplementation();
    136134
    137135    /// Stem the specified word.
    138     std::string operator()(const std::string & word);
     136    virtual std::string operator()(const std::string & word);
    139137
    140138    /// Virtual method implemented by the subclass to actually do the work.
    141139    virtual int stem() = 0;
    142 
    143     /// Return string describing this object.
    144     virtual const char * get_description() const = 0;
    145140};
    146141
    147142}
  • xapian-core/languages/stem.cc

     
    3434
    3535namespace Xapian {
    3636
     37Stem::Implementation::Implementation()
     38{
     39}
     40   
     41Stem::Implementation::~Implementation()
     42{
     43}
     44
    3745Stem::Stem(const Stem & o) : internal(o.internal) { }
    3846
    3947void
     
    4452
    4553Stem::Stem() : internal(0) { }
    4654
     55Stem::Stem(Stem::Implementation * p) : internal(p) { }
     56
    4757Stem::Stem(const std::string &language) : internal(0) {
    4858    if (language.empty()) return;
    4959    switch (language[0]) {
  • xapian-core/languages/compiler/generator.c

     
    15251525        if (q->type == t_routine && q->routine_called_from_among) {
    15261526            q->among_func_count = ++among_func_count;
    15271527            g->V[0] = q;
    1528             w(g, "static int t~V0(Xapian::Stem::Internal * this_ptr) {~N"
     1528            w(g, "static int t~V0(Xapian::Stem::Implementation * this_ptr) {~N"
    15291529                 "    return (static_cast<Xapian::~S0 *>(this_ptr))->~V0();~N"
    15301530                 "}~N"
    15311531                 "~N");
  • xapian-bindings/csharp/Makefile.am

     
    4141        QueryParser.cs \
    4242        Remote.cs \
    4343        RSet.cs \
     44        SWIGTYPE_p_Xapian__Stem__Implementation.cs \
    4445        SWIGTYPE_p_std__string.cs \
    4546        SWIGTYPE_p_std__vectorT_std__string_t.cs \
    4647        SWIGTYPE_p_std__vectorT_Xapian__Query_t.cs \