Ticket #448: xapian-core-stem-implementation.patch
File xapian-core-stem-implementation.patch, 12.6 KB (added by , 14 years ago) |
---|
-
xapian-core/include/xapian/stem.h
32 32 class XAPIAN_VISIBILITY_DEFAULT Stem { 33 33 public: 34 34 /// @private @internal Class representing the stemmer internals. 35 class Internal; 35 class Implementation; 36 37 /// @private @internal Class representing the Snowball stemmer internals. 38 class SnowballImplementation; 39 36 40 /// @private @internal Reference counted internals. 37 Xapian::Internal::RefCntPtr<I nternal> internal;41 Xapian::Internal::RefCntPtr<Implementation> internal; 38 42 39 43 /// Copy constructor. 40 44 Stem(const Stem & o); … … 48 52 */ 49 53 Stem(); 50 54 55 /** Construct a Xapian::Stem object with user-provided stemming algorithm. 56 * 57 * User could create a subclass of Xapian::Stem::Implementation, and wrap 58 * it in a Xapian::Stem object to pass to the Xapian API. 59 */ 60 Stem(Implementation * p); 61 51 62 /** Construct a Xapian::Stem object for a particular language. 52 63 * 53 64 * @param language Either the English name for the language -
xapian-core/languages/Makefile.mk
56 56 $(CC_FOR_BUILD) -o languages/snowball -DDISABLE_JAVA `for f in $(snowball_sources) ; do test -f $$f && echo $$f || echo $(srcdir)/$$f ; done` 57 57 58 58 .sbl.cc: 59 languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem:: Internal59 languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem::SnowballImplementation 60 60 61 61 .sbl.h: 62 languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem:: Internal62 languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem::SnowballImplementation 63 63 64 64 languages/allsnowballheaders.h: languages/generate-allsnowballheaders languages/Makefile.mk 65 65 languages/generate-allsnowballheaders $(snowball_built_sources) -
xapian-core/languages/steminternal.cc
129 129 130 130 namespace Xapian { 131 131 132 Stem::I nternal::~Internal()132 Stem::Implementation::Implementation() 133 133 { 134 } 135 136 Stem::Implementation::~Implementation() 137 { 138 } 139 140 Stem::SnowballImplementation::~SnowballImplementation() 141 { 134 142 lose_s(p); 135 143 } 136 144 137 145 string 138 Stem:: Internal::operator()(const string & word)146 Stem::SnowballImplementation::operator()(const string & word) 139 147 { 140 148 const symbol * s = reinterpret_cast<const symbol *>(word.data()); 141 149 replace_s(0, l, word.size(), s); … … 149 157 150 158 /* Code for character groupings: utf8 cases */ 151 159 152 int Stem:: Internal::get_utf8(int * slot) {160 int Stem::SnowballImplementation::get_utf8(int * slot) { 153 161 int b0, b1; 154 162 int tmp = c; 155 163 if (tmp >= l) return 0; … … 164 172 * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[tmp] & 0x3F); return 3; 165 173 } 166 174 167 int Stem:: Internal::get_b_utf8(int * slot) {175 int Stem::SnowballImplementation::get_b_utf8(int * slot) { 168 176 int b0, b1; 169 177 int tmp = c; 170 178 if (tmp <= lb) return 0; … … 179 187 * slot = (p[tmp] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; 180 188 } 181 189 182 int Stem::Internal::in_grouping_U(const unsigned char * s, int min, int max, int repeat) { 190 int 191 Stem::SnowballImplementation::in_grouping_U(const unsigned char * s, int min, 192 int max, int repeat) { 183 193 do { 184 194 int ch; 185 195 int w = get_utf8(&ch); 186 196 if (!w) return -1; 187 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 197 if (ch > max || (ch -= min) < 0 || 198 (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 188 199 return w; 189 200 c += w; 190 201 } while (repeat); 191 202 return 0; 192 203 } 193 204 194 int Stem::Internal::in_grouping_b_U(const unsigned char * s, int min, int max, int repeat) { 205 int 206 Stem::SnowballImplementation::in_grouping_b_U(const unsigned char * s, int min, 207 int max, int repeat) { 195 208 do { 196 209 int ch; 197 210 int w = get_b_utf8(&ch); 198 211 if (!w) return -1; 199 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 212 if (ch > max || (ch -= min) < 0 || 213 (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 200 214 return w; 201 215 c -= w; 202 216 } while (repeat); 203 217 return 0; 204 218 } 205 219 206 int Stem::Internal::out_grouping_U(const unsigned char * s, int min, int max, int repeat) { 220 int 221 Stem::SnowballImplementation::out_grouping_U(const unsigned char * s, int min, 222 int max, int repeat) { 207 223 do { 208 224 int ch; 209 225 int w = get_utf8(&ch); 210 226 if (!w) return -1; 211 if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 212 /* FIXME: try adding this so gopast in generated code is simpler: if (repeat == 2) c += w; */ return w; 227 if (!(ch > max || (ch -= min) < 0 || 228 (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 229 /* FIXME: try adding this so gopast in generated code is simpler: 230 if (repeat == 2) c += w; */ return w; 213 231 c += w; 214 232 } while (repeat); 215 233 return 0; 216 234 } 217 235 218 int Stem::Internal::out_grouping_b_U(const unsigned char * s, int min, int max, int repeat) { 236 int 237 Stem::SnowballImplementation::out_grouping_b_U(const unsigned char * s, int min, 238 int max, int repeat) { 219 239 do { 220 240 int ch; 221 241 int w = get_b_utf8(&ch); 222 242 if (!w) return -1; 223 if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 243 if (!(ch > max || (ch -= min) < 0 || 244 (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 224 245 return w; 225 246 c -= w; 226 247 } while (repeat); 227 248 return 0; 228 249 } 229 250 230 int Stem:: Internal::eq_s(int s_size, const symbol * s) {251 int Stem::SnowballImplementation::eq_s(int s_size, const symbol * s) { 231 252 if (l - c < s_size || memcmp(p + c, s, s_size * sizeof(symbol)) != 0) 232 253 return 0; 233 254 c += s_size; 234 255 return 1; 235 256 } 236 257 237 int Stem::Internal::eq_s_b(int s_size, const symbol * s) { 238 if (c - lb < s_size || memcmp(p + c - s_size, s, s_size * sizeof(symbol)) != 0) 258 int Stem::SnowballImplementation::eq_s_b(int s_size, const symbol * s) { 259 if (c - lb < s_size || 260 memcmp(p + c - s_size, s, s_size * sizeof(symbol)) != 0) 239 261 return 0; 240 262 c -= s_size; 241 263 return 1; 242 264 } 243 265 244 266 int 245 Stem::Internal::find_among(const symbol * pool, const struct among * v, 246 int v_size, const unsigned char * fnum, 247 const among_function * f) 267 Stem::SnowballImplementation::find_among(const symbol * pool, 268 const struct among * v, int v_size, 269 const unsigned char * fnum, 270 const among_function * f) 248 271 { 249 272 int i = 0; 250 273 int j = v_size; … … 300 323 301 324 /* find_among_b is for backwards processing. Same comments apply */ 302 325 int 303 Stem::Internal::find_among_b(const symbol * pool, const struct among * v, 304 int v_size, const unsigned char * fnum, 305 const among_function * f) 326 Stem::SnowballImplementation::find_among_b(const symbol * pool, 327 const struct among * v, int v_size, 328 const unsigned char * fnum, 329 const among_function * f) 306 330 { 307 331 int i = 0; 308 332 int j = v_size; … … 352 376 } 353 377 354 378 int 355 Stem::Internal::replace_s(int c_bra, int c_ket, int s_size, const symbol * s) 379 Stem::SnowballImplementation::replace_s(int c_bra, int c_ket, int s_size, 380 const symbol * s) 356 381 { 357 382 int adjustment; 358 383 int len; … … 378 403 return adjustment; 379 404 } 380 405 381 int Stem:: Internal::slice_check() {406 int Stem::SnowballImplementation::slice_check() { 382 407 Assert(p); 383 408 if (bra < 0 || bra > ket || ket > l) { 384 409 #if 0 … … 390 415 return 0; 391 416 } 392 417 393 int Stem:: Internal::slice_from_s(int s_size, const symbol * s) {418 int Stem::SnowballImplementation::slice_from_s(int s_size, const symbol * s) { 394 419 if (slice_check()) return -1; 395 420 replace_s(bra, ket, s_size, s); 396 421 return 0; 397 422 } 398 423 399 void Stem::Internal::insert_s(int c_bra, int c_ket, int s_size, const symbol * s) { 424 void 425 Stem::SnowballImplementation::insert_s(int c_bra, int c_ket, int s_size, 426 const symbol * s) { 400 427 int adjustment = replace_s(c_bra, c_ket, s_size, s); 401 428 if (c_bra <= bra) bra += adjustment; 402 429 if (c_bra <= ket) ket += adjustment; 403 430 } 404 431 405 symbol * Stem:: Internal::slice_to(symbol * v) {432 symbol * Stem::SnowballImplementation::slice_to(symbol * v) { 406 433 if (slice_check()) return NULL; 407 434 { 408 435 int len = ket - bra; … … 415 442 return v; 416 443 } 417 444 418 symbol * Stem:: Internal::assign_to(symbol * v) {445 symbol * Stem::SnowballImplementation::assign_to(symbol * v) { 419 446 int len = l; 420 447 if (CAPACITY(v) < len) { 421 448 v = increase_size(v, len); … … 426 453 } 427 454 428 455 #if 0 429 void Stem:: Internal::debug(int number, int line_count) {456 void Stem::SnowballImplementation::debug(int number, int line_count) { 430 457 int i; 431 458 int limit = SIZE(p); 432 459 /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ -
xapian-core/languages/steminternal.h
65 65 reinterpret_cast<int *>(void_p)[-2] = n; 66 66 } 67 67 68 typedef int (*among_function)(Xapian::Stem::I nternal*);68 typedef int (*among_function)(Xapian::Stem::Implementation *); 69 69 70 70 struct among { 71 71 int s_size; /* length of search string (in symbols) */ … … 84 84 85 85 namespace Xapian { 86 86 87 class Stem::Internal : public Xapian::Internal::RefCntBase { 87 /// Class representing a stemming algorithm implementation. 88 struct Stem::Implementation : public Xapian::Internal::RefCntBase 89 { 90 /// Perform initialisation common to all stemmers. 91 Implementation(); 92 93 /// Perform cleanup common to all stemmers. 94 virtual ~Implementation(); 95 96 /// Stem the specified word. 97 virtual std::string operator()(const std::string & word) = 0; 98 99 /// Return string describing this object. 100 virtual const char * get_description() const = 0; 101 }; 102 103 class Stem::SnowballImplementation : public Stem::Implementation { 88 104 int slice_check(); 89 105 90 106 protected: … … 129 145 130 146 public: 131 147 /// Perform initialisation common to all Snowball stemmers. 132 Internal() : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { } 148 SnowballImplementation() : p(create_s()), 149 c(0), l(0), lb(0), bra(0), ket(0) { } 133 150 134 151 /// Perform cleanup common to all Snowball stemmers. 135 virtual ~ Internal();152 virtual ~SnowballImplementation(); 136 153 137 154 /// Stem the specified word. 138 std::string operator()(const std::string & word);155 virtual std::string operator()(const std::string & word); 139 156 140 157 /// Virtual method implemented by the subclass to actually do the work. 141 158 virtual int stem() = 0; 142 143 /// Return string describing this object.144 virtual const char * get_description() const = 0;145 159 }; 146 160 147 161 } -
xapian-core/languages/stem.cc
44 44 45 45 Stem::Stem() : internal(0) { } 46 46 47 Stem::Stem(Stem::Implementation * p) : internal(p) { } 48 47 49 Stem::Stem(const std::string &language) : internal(0) { 48 50 if (language.empty()) return; 49 51 switch (language[0]) { -
xapian-core/languages/compiler/generator.c
1525 1525 if (q->type == t_routine && q->routine_called_from_among) { 1526 1526 q->among_func_count = ++among_func_count; 1527 1527 g->V[0] = q; 1528 w(g, "static int t~V0(Xapian::Stem::I nternal* this_ptr) {~N"1528 w(g, "static int t~V0(Xapian::Stem::Implementation * this_ptr) {~N" 1529 1529 " return (static_cast<Xapian::~S0 *>(this_ptr))->~V0();~N" 1530 1530 "}~N" 1531 1531 "~N");