Ticket #448: xapian-core-stem-implementation-2.patch
File xapian-core-stem-implementation-2.patch, 13.4 KB (added by , 15 years ago) |
---|
-
xapian-core/include/xapian/stem.h
31 31 /// Class representing a stemming algorithm. 32 32 class XAPIAN_VISIBILITY_DEFAULT Stem { 33 33 public: 34 /// @private @internal Class representing the stemmer internals. 35 class Internal; 34 35 #ifdef SWIG 36 /// @private @internal Class representing a stemming algorithm implementation. 37 struct Implementation; 38 #else 39 /// Class representing a stemming algorithm implementation. 40 struct Implementation : public Xapian::Internal::RefCntBase 41 { 42 /// Perform initialisation common to all stemmers. 43 Implementation(); 44 45 /// Perform cleanup common to all stemmers. 46 virtual ~Implementation(); 47 48 /// Stem the specified word. 49 virtual std::string operator()(const std::string & word) = 0; 50 51 /// Return string describing this object. 52 virtual const char * get_description() const = 0; 53 }; 54 #endif 55 36 56 /// @private @internal Reference counted internals. 37 Xapian::Internal::RefCntPtr<I nternal> internal;57 Xapian::Internal::RefCntPtr<Implementation> internal; 38 58 39 59 /// Copy constructor. 40 60 Stem(const Stem & o); … … 48 68 */ 49 69 Stem(); 50 70 71 /** Construct a Xapian::Stem object with user-provided stemming algorithm. 72 * 73 * User could create a subclass of Xapian::Stem::Implementation, and wrap 74 * it in a Xapian::Stem object to pass to the Xapian API. 75 */ 76 Stem(Implementation * p); 77 51 78 /** Construct a Xapian::Stem object for a particular language. 52 79 * 53 80 * @param language Either the English name for the language -
xapian-core/languages/Makefile.mk
56 56 $(CC_FOR_BUILD) -o languages/snowball -DDISABLE_JAVA `for f in $(snowball_sources) ; do test -f $$f && echo $$f || echo $(srcdir)/$$f ; done` 57 57 58 58 .sbl.cc: 59 languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p S tem::Internal59 languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStemImplementation 60 60 61 61 .sbl.h: 62 languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p S tem::Internal62 languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStemImplementation 63 63 64 64 languages/allsnowballheaders.h: languages/generate-allsnowballheaders languages/Makefile.mk 65 65 languages/generate-allsnowballheaders $(snowball_built_sources) -
xapian-core/languages/steminternal.cc
128 128 } 129 129 130 130 namespace Xapian { 131 132 S tem::Internal::~Internal()131 132 SnowballStemImplementation::~SnowballStemImplementation() 133 133 { 134 134 lose_s(p); 135 135 } 136 136 137 137 string 138 S tem::Internal::operator()(const string & word)138 SnowballStemImplementation::operator()(const string & word) 139 139 { 140 140 const symbol * s = reinterpret_cast<const symbol *>(word.data()); 141 141 replace_s(0, l, word.size(), s); … … 149 149 150 150 /* Code for character groupings: utf8 cases */ 151 151 152 int S tem::Internal::get_utf8(int * slot) {152 int SnowballStemImplementation::get_utf8(int * slot) { 153 153 int b0, b1; 154 154 int tmp = c; 155 155 if (tmp >= l) return 0; … … 164 164 * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[tmp] & 0x3F); return 3; 165 165 } 166 166 167 int S tem::Internal::get_b_utf8(int * slot) {167 int SnowballStemImplementation::get_b_utf8(int * slot) { 168 168 int b0, b1; 169 169 int tmp = c; 170 170 if (tmp <= lb) return 0; … … 179 179 * slot = (p[tmp] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; 180 180 } 181 181 182 int Stem::Internal::in_grouping_U(const unsigned char * s, int min, int max, int repeat) { 182 int 183 SnowballStemImplementation::in_grouping_U(const unsigned char * s, int min, 184 int max, int repeat) { 183 185 do { 184 186 int ch; 185 187 int w = get_utf8(&ch); 186 188 if (!w) return -1; 187 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 189 if (ch > max || (ch -= min) < 0 || 190 (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 188 191 return w; 189 192 c += w; 190 193 } while (repeat); 191 194 return 0; 192 195 } 193 196 194 int Stem::Internal::in_grouping_b_U(const unsigned char * s, int min, int max, int repeat) { 197 int 198 SnowballStemImplementation::in_grouping_b_U(const unsigned char * s, int min, 199 int max, int repeat) { 195 200 do { 196 201 int ch; 197 202 int w = get_b_utf8(&ch); 198 203 if (!w) return -1; 199 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 204 if (ch > max || (ch -= min) < 0 || 205 (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 200 206 return w; 201 207 c -= w; 202 208 } while (repeat); 203 209 return 0; 204 210 } 205 211 206 int Stem::Internal::out_grouping_U(const unsigned char * s, int min, int max, int repeat) { 212 int 213 SnowballStemImplementation::out_grouping_U(const unsigned char * s, int min, 214 int max, int repeat) { 207 215 do { 208 216 int ch; 209 217 int w = get_utf8(&ch); 210 218 if (!w) return -1; 211 if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 212 /* FIXME: try adding this so gopast in generated code is simpler: if (repeat == 2) c += w; */ return w; 219 if (!(ch > max || (ch -= min) < 0 || 220 (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 221 /* FIXME: try adding this so gopast in generated code is simpler: 222 if (repeat == 2) c += w; */ return w; 213 223 c += w; 214 224 } while (repeat); 215 225 return 0; 216 226 } 217 227 218 int Stem::Internal::out_grouping_b_U(const unsigned char * s, int min, int max, int repeat) { 228 int 229 SnowballStemImplementation::out_grouping_b_U(const unsigned char * s, int min, 230 int max, int repeat) { 219 231 do { 220 232 int ch; 221 233 int w = get_b_utf8(&ch); 222 234 if (!w) return -1; 223 if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 235 if (!(ch > max || (ch -= min) < 0 || 236 (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 224 237 return w; 225 238 c -= w; 226 239 } while (repeat); 227 240 return 0; 228 241 } 229 242 230 int S tem::Internal::eq_s(int s_size, const symbol * s) {243 int SnowballStemImplementation::eq_s(int s_size, const symbol * s) { 231 244 if (l - c < s_size || memcmp(p + c, s, s_size * sizeof(symbol)) != 0) 232 245 return 0; 233 246 c += s_size; 234 247 return 1; 235 248 } 236 249 237 int Stem::Internal::eq_s_b(int s_size, const symbol * s) { 238 if (c - lb < s_size || memcmp(p + c - s_size, s, s_size * sizeof(symbol)) != 0) 250 int SnowballStemImplementation::eq_s_b(int s_size, const symbol * s) { 251 if (c - lb < s_size || 252 memcmp(p + c - s_size, s, s_size * sizeof(symbol)) != 0) 239 253 return 0; 240 254 c -= s_size; 241 255 return 1; 242 256 } 243 257 244 258 int 245 Stem::Internal::find_among(const symbol * pool, const struct among * v, 246 int v_size, const unsigned char * fnum, 247 const among_function * f) 259 SnowballStemImplementation::find_among(const symbol * pool, 260 const struct among * v, int v_size, 261 const unsigned char * fnum, 262 const among_function * f) 248 263 { 249 264 int i = 0; 250 265 int j = v_size; … … 300 315 301 316 /* find_among_b is for backwards processing. Same comments apply */ 302 317 int 303 Stem::Internal::find_among_b(const symbol * pool, const struct among * v, 304 int v_size, const unsigned char * fnum, 305 const among_function * f) 318 SnowballStemImplementation::find_among_b(const symbol * pool, 319 const struct among * v, int v_size, 320 const unsigned char * fnum, 321 const among_function * f) 306 322 { 307 323 int i = 0; 308 324 int j = v_size; … … 352 368 } 353 369 354 370 int 355 Stem::Internal::replace_s(int c_bra, int c_ket, int s_size, const symbol * s) 371 SnowballStemImplementation::replace_s(int c_bra, int c_ket, int s_size, 372 const symbol * s) 356 373 { 357 374 int adjustment; 358 375 int len; … … 378 395 return adjustment; 379 396 } 380 397 381 int S tem::Internal::slice_check() {398 int SnowballStemImplementation::slice_check() { 382 399 Assert(p); 383 400 if (bra < 0 || bra > ket || ket > l) { 384 401 #if 0 … … 390 407 return 0; 391 408 } 392 409 393 int S tem::Internal::slice_from_s(int s_size, const symbol * s) {410 int SnowballStemImplementation::slice_from_s(int s_size, const symbol * s) { 394 411 if (slice_check()) return -1; 395 412 replace_s(bra, ket, s_size, s); 396 413 return 0; 397 414 } 398 415 399 void Stem::Internal::insert_s(int c_bra, int c_ket, int s_size, const symbol * s) { 416 void 417 SnowballStemImplementation::insert_s(int c_bra, int c_ket, int s_size, 418 const symbol * s) { 400 419 int adjustment = replace_s(c_bra, c_ket, s_size, s); 401 420 if (c_bra <= bra) bra += adjustment; 402 421 if (c_bra <= ket) ket += adjustment; 403 422 } 404 423 405 symbol * S tem::Internal::slice_to(symbol * v) {424 symbol * SnowballStemImplementation::slice_to(symbol * v) { 406 425 if (slice_check()) return NULL; 407 426 { 408 427 int len = ket - bra; … … 415 434 return v; 416 435 } 417 436 418 symbol * S tem::Internal::assign_to(symbol * v) {437 symbol * SnowballStemImplementation::assign_to(symbol * v) { 419 438 int len = l; 420 439 if (CAPACITY(v) < len) { 421 440 v = increase_size(v, len); … … 426 445 } 427 446 428 447 #if 0 429 void S tem::Internal::debug(int number, int line_count) {448 void SnowballStemImplementation::debug(int number, int line_count) { 430 449 int i; 431 450 int limit = SIZE(p); 432 451 /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ -
xapian-core/languages/steminternal.h
27 27 #include <cstdlib> 28 28 #include <string> 29 29 30 // FIXME: we might want to make Stem::Internal a virtual base class and have31 // Stem::Internal::Snowball to allow for non-Snowball stemmers...32 33 30 typedef unsigned char symbol; 34 31 35 32 #define HEAD (2*sizeof(int)) … … 65 62 reinterpret_cast<int *>(void_p)[-2] = n; 66 63 } 67 64 68 typedef int (*among_function)(Xapian::Stem::I nternal*);65 typedef int (*among_function)(Xapian::Stem::Implementation *); 69 66 70 67 struct among { 71 68 int s_size; /* length of search string (in symbols) */ … … 84 81 85 82 namespace Xapian { 86 83 87 class S tem::Internal : public Xapian::Internal::RefCntBase{84 class SnowballStemImplementation : public Stem::Implementation { 88 85 int slice_check(); 89 86 90 87 protected: … … 129 126 130 127 public: 131 128 /// Perform initialisation common to all Snowball stemmers. 132 Internal() : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { } 129 SnowballStemImplementation() : p(create_s()), 130 c(0), l(0), lb(0), bra(0), ket(0) { } 133 131 134 132 /// Perform cleanup common to all Snowball stemmers. 135 virtual ~ Internal();133 virtual ~SnowballStemImplementation(); 136 134 137 135 /// Stem the specified word. 138 std::string operator()(const std::string & word);136 virtual std::string operator()(const std::string & word); 139 137 140 138 /// Virtual method implemented by the subclass to actually do the work. 141 139 virtual int stem() = 0; 142 143 /// Return string describing this object.144 virtual const char * get_description() const = 0;145 140 }; 146 141 147 142 } -
xapian-core/languages/stem.cc
34 34 35 35 namespace Xapian { 36 36 37 Stem::Implementation::Implementation() 38 { 39 } 40 41 Stem::Implementation::~Implementation() 42 { 43 } 44 37 45 Stem::Stem(const Stem & o) : internal(o.internal) { } 38 46 39 47 void … … 44 52 45 53 Stem::Stem() : internal(0) { } 46 54 55 Stem::Stem(Stem::Implementation * p) : internal(p) { } 56 47 57 Stem::Stem(const std::string &language) : internal(0) { 48 58 if (language.empty()) return; 49 59 switch (language[0]) { -
xapian-core/languages/compiler/generator.c
1525 1525 if (q->type == t_routine && q->routine_called_from_among) { 1526 1526 q->among_func_count = ++among_func_count; 1527 1527 g->V[0] = q; 1528 w(g, "static int t~V0(Xapian::Stem::I nternal* this_ptr) {~N"1528 w(g, "static int t~V0(Xapian::Stem::Implementation * this_ptr) {~N" 1529 1529 " return (static_cast<Xapian::~S0 *>(this_ptr))->~V0();~N" 1530 1530 "}~N" 1531 1531 "~N"); -
xapian-bindings/csharp/Makefile.am
41 41 QueryParser.cs \ 42 42 Remote.cs \ 43 43 RSet.cs \ 44 SWIGTYPE_p_Xapian__Stem__Implementation.cs \ 44 45 SWIGTYPE_p_std__string.cs \ 45 46 SWIGTYPE_p_std__vectorT_std__string_t.cs \ 46 47 SWIGTYPE_p_std__vectorT_Xapian__Query_t.cs \