Ticket #448: xapian-core-stem-implementation-4.r14281.patch
File xapian-core-stem-implementation-4.r14281.patch, 13.9 KB (added by , 15 years ago) |
---|
-
xapian-core/include/xapian/stem.h
28 28 29 29 namespace Xapian { 30 30 31 /// Class representing a stemming algorithm implementation. 32 struct StemImplementation : public Xapian::Internal::RefCntBase 33 { 34 /// Perform cleanup common to all stemmers. 35 virtual ~StemImplementation() { } 36 37 /// Stem the specified word. 38 virtual std::string operator()(const std::string & word) = 0; 39 40 /// Return string describing this object. 41 virtual const std::string get_description() const = 0; 42 }; 43 31 44 /// Class representing a stemming algorithm. 32 45 class XAPIAN_VISIBILITY_DEFAULT Stem { 33 46 public: 34 /// @private @internal Class representing the stemmer internals.35 class Internal;36 47 /// @private @internal Reference counted internals. 37 Xapian::Internal::RefCntPtr< Internal> internal;48 Xapian::Internal::RefCntPtr<StemImplementation> internal; 38 49 39 50 /// Copy constructor. 40 51 Stem(const Stem & o); … … 48 59 */ 49 60 Stem(); 50 61 62 /** Construct a Xapian::Stem object with user-provided stemming algorithm. 63 * 64 * User could create a subclass of Xapian::StemImplementation, and wrap 65 * it in a Xapian::Stem object to pass to the Xapian API. 66 */ 67 Stem(StemImplementation * p); 68 51 69 /** Construct a Xapian::Stem object for a particular language. 52 70 * 53 71 * @param language Either the English name for the language -
xapian-core/languages/Makefile.mk
56 56 $(CC_FOR_BUILD) -o languages/snowball -DDISABLE_JAVA `for f in $(snowball_sources) ; do test -f $$f && echo $$f || echo $(srcdir)/$$f ; done` 57 57 58 58 .sbl.cc: 59 languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p S tem::Internal59 languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStemImplementation 60 60 61 61 .sbl.h: 62 languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p S tem::Internal62 languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStemImplementation 63 63 64 64 languages/allsnowballheaders.h: languages/generate-allsnowballheaders languages/Makefile.mk 65 65 languages/generate-allsnowballheaders $(snowball_built_sources) -
xapian-core/languages/steminternal.cc
128 128 } 129 129 130 130 namespace Xapian { 131 132 S tem::Internal::~Internal()131 132 SnowballStemImplementation::~SnowballStemImplementation() 133 133 { 134 134 lose_s(p); 135 135 } 136 136 137 137 string 138 S tem::Internal::operator()(const string & word)138 SnowballStemImplementation::operator()(const string & word) 139 139 { 140 140 const symbol * s = reinterpret_cast<const symbol *>(word.data()); 141 141 replace_s(0, l, word.size(), s); … … 149 149 150 150 /* Code for character groupings: utf8 cases */ 151 151 152 int S tem::Internal::get_utf8(int * slot) {152 int SnowballStemImplementation::get_utf8(int * slot) { 153 153 int b0, b1; 154 154 int tmp = c; 155 155 if (tmp >= l) return 0; … … 164 164 * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[tmp] & 0x3F); return 3; 165 165 } 166 166 167 int S tem::Internal::get_b_utf8(int * slot) {167 int SnowballStemImplementation::get_b_utf8(int * slot) { 168 168 int b0, b1; 169 169 int tmp = c; 170 170 if (tmp <= lb) return 0; … … 179 179 * slot = (p[tmp] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; 180 180 } 181 181 182 int Stem::Internal::in_grouping_U(const unsigned char * s, int min, int max, int repeat) { 182 int 183 SnowballStemImplementation::in_grouping_U(const unsigned char * s, int min, 184 int max, int repeat) { 183 185 do { 184 186 int ch; 185 187 int w = get_utf8(&ch); 186 188 if (!w) return -1; 187 if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 189 if (ch > max || (ch -= min) < 0 || 190 (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 188 191 return w; 189 192 c += w; 190 193 } while (repeat); 191 194 return 0; 192 195 } 193 196 194 int Stem::Internal::in_grouping_b_U(const unsigned char * s, int min, int max, int repeat) { 197 int 198 SnowballStemImplementation::in_grouping_b_U(const unsigned char * s, int min, 199 int max, int repeat) { 195 200 do { 196 201 int ch; 197 202 int w = get_b_utf8(&ch); … … 203 208 return 0; 204 209 } 205 210 206 int Stem::Internal::out_grouping_U(const unsigned char * s, int min, int max, int repeat) { 211 int 212 SnowballStemImplementation::out_grouping_U(const unsigned char * s, int min, 213 int max, int repeat) { 207 214 do { 208 215 int ch; 209 216 int w = get_utf8(&ch); … … 215 222 return 0; 216 223 } 217 224 218 int Stem::Internal::out_grouping_b_U(const unsigned char * s, int min, int max, int repeat) { 225 int 226 SnowballStemImplementation::out_grouping_b_U(const unsigned char * s, int min, 227 int max, int repeat) { 219 228 do { 220 229 int ch; 221 230 int w = get_b_utf8(&ch); … … 227 236 return 0; 228 237 } 229 238 230 int S tem::Internal::eq_s(int s_size, const symbol * s) {239 int SnowballStemImplementation::eq_s(int s_size, const symbol * s) { 231 240 if (l - c < s_size || memcmp(p + c, s, s_size * sizeof(symbol)) != 0) 232 241 return 0; 233 242 c += s_size; 234 243 return 1; 235 244 } 236 245 237 int S tem::Internal::eq_s_b(int s_size, const symbol * s) {246 int SnowballStemImplementation::eq_s_b(int s_size, const symbol * s) { 238 247 if (c - lb < s_size || memcmp(p + c - s_size, s, s_size * sizeof(symbol)) != 0) 239 248 return 0; 240 249 c -= s_size; … … 242 251 } 243 252 244 253 int 245 Stem::Internal::find_among(const symbol * pool, const struct among * v, 246 int v_size, const unsigned char * fnum, 247 const among_function * f) 254 SnowballStemImplementation::find_among(const symbol * pool, 255 const struct among * v, int v_size, 256 const unsigned char * fnum, 257 const among_function * f) 248 258 { 249 259 int i = 0; 250 260 int j = v_size; … … 300 310 301 311 /* find_among_b is for backwards processing. Same comments apply */ 302 312 int 303 Stem::Internal::find_among_b(const symbol * pool, const struct among * v, 304 int v_size, const unsigned char * fnum, 305 const among_function * f) 313 SnowballStemImplementation::find_among_b(const symbol * pool, 314 const struct among * v, int v_size, 315 const unsigned char * fnum, 316 const among_function * f) 306 317 { 307 318 int i = 0; 308 319 int j = v_size; … … 352 363 } 353 364 354 365 int 355 Stem::Internal::replace_s(int c_bra, int c_ket, int s_size, const symbol * s) 366 SnowballStemImplementation::replace_s(int c_bra, int c_ket, int s_size, 367 const symbol * s) 356 368 { 357 369 int adjustment; 358 370 int len; … … 378 390 return adjustment; 379 391 } 380 392 381 int S tem::Internal::slice_check() {393 int SnowballStemImplementation::slice_check() { 382 394 Assert(p); 383 395 if (bra < 0 || bra > ket || ket > l) { 384 396 #if 0 … … 390 402 return 0; 391 403 } 392 404 393 int S tem::Internal::slice_from_s(int s_size, const symbol * s) {405 int SnowballStemImplementation::slice_from_s(int s_size, const symbol * s) { 394 406 if (slice_check()) return -1; 395 407 replace_s(bra, ket, s_size, s); 396 408 return 0; 397 409 } 398 410 399 void Stem::Internal::insert_s(int c_bra, int c_ket, int s_size, const symbol * s) { 411 void 412 SnowballStemImplementation::insert_s(int c_bra, int c_ket, int s_size, 413 const symbol * s) { 400 414 int adjustment = replace_s(c_bra, c_ket, s_size, s); 401 415 if (c_bra <= bra) bra += adjustment; 402 416 if (c_bra <= ket) ket += adjustment; 403 417 } 404 418 405 symbol * S tem::Internal::slice_to(symbol * v) {419 symbol * SnowballStemImplementation::slice_to(symbol * v) { 406 420 if (slice_check()) return NULL; 407 421 { 408 422 int len = ket - bra; … … 415 429 return v; 416 430 } 417 431 418 symbol * S tem::Internal::assign_to(symbol * v) {432 symbol * SnowballStemImplementation::assign_to(symbol * v) { 419 433 int len = l; 420 434 if (CAPACITY(v) < len) { 421 435 v = increase_size(v, len); … … 426 440 } 427 441 428 442 #if 0 429 void S tem::Internal::debug(int number, int line_count) {443 void SnowballStemImplementation::debug(int number, int line_count) { 430 444 int i; 431 445 int limit = SIZE(p); 432 446 /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ -
xapian-core/languages/steminternal.h
27 27 #include <cstdlib> 28 28 #include <string> 29 29 30 // FIXME: we might want to make Stem::Internal a virtual base class and have31 // Stem::Internal::Snowball to allow for non-Snowball stemmers...32 33 30 typedef unsigned char symbol; 34 31 35 32 #define HEAD (2*sizeof(int)) … … 65 62 reinterpret_cast<int *>(void_p)[-2] = n; 66 63 } 67 64 68 typedef int (*among_function)(Xapian::Stem ::Internal*);65 typedef int (*among_function)(Xapian::StemImplementation *); 69 66 70 67 struct among { 71 68 int s_size; /* length of search string (in symbols) */ … … 84 81 85 82 namespace Xapian { 86 83 87 class S tem::Internal : public Xapian::Internal::RefCntBase{84 class SnowballStemImplementation : public StemImplementation { 88 85 int slice_check(); 89 86 90 87 protected: … … 129 126 130 127 public: 131 128 /// Perform initialisation common to all Snowball stemmers. 132 Internal() : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { } 129 SnowballStemImplementation() : p(create_s()), 130 c(0), l(0), lb(0), bra(0), ket(0) { } 133 131 134 132 /// Perform cleanup common to all Snowball stemmers. 135 virtual ~ Internal();133 virtual ~SnowballStemImplementation(); 136 134 137 135 /// Stem the specified word. 138 std::string operator()(const std::string & word);136 virtual std::string operator()(const std::string & word); 139 137 140 138 /// Virtual method implemented by the subclass to actually do the work. 141 139 virtual int stem() = 0; 142 143 /// Return string describing this object.144 virtual const char * get_description() const = 0;145 140 }; 146 141 147 142 } -
xapian-core/languages/stem.cc
44 44 45 45 Stem::Stem() : internal(0) { } 46 46 47 Stem::Stem(StemImplementation * p) : internal(p) { } 48 47 49 Stem::Stem(const std::string &language) : internal(0) { 48 50 if (language.empty()) return; 49 51 switch (language[0]) { -
xapian-core/languages/compiler/generator.c
1525 1525 if (q->type == t_routine && q->routine_called_from_among) { 1526 1526 q->among_func_count = ++among_func_count; 1527 1527 g->V[0] = q; 1528 w(g, "static int t~V0(Xapian::Stem ::Internal* this_ptr) {~N"1528 w(g, "static int t~V0(Xapian::StemImplementation * this_ptr) {~N" 1529 1529 " return (static_cast<Xapian::~S0 *>(this_ptr))->~V0();~N" 1530 1530 "}~N" 1531 1531 "~N"); … … 1664 1664 w(g, "}~N"); 1665 1665 1666 1666 w(g, "~N" 1667 "const char *~N"1667 "const std::string~N" 1668 1668 "Xapian::~S0::get_description() const~N" 1669 1669 "{~N" 1670 1670 " return \"~S2\";~N" … … 1739 1739 } 1740 1740 } 1741 1741 1742 w(g, " const char *get_description() const;~N"1742 w(g, " const std::string get_description() const;~N" 1743 1743 "};~N" 1744 1744 "~N" 1745 1745 "}~N"); -
xapian-bindings/csharp/Makefile.am
41 41 QueryParser.cs \ 42 42 Remote.cs \ 43 43 RSet.cs \ 44 RefCntBase.cs \ 44 45 SWIGTYPE_p_std__string.cs \ 45 46 SWIGTYPE_p_std__vectorT_std__string_t.cs \ 46 47 SWIGTYPE_p_std__vectorT_Xapian__Query_t.cs \ … … 48 49 SimpleStopper.cs \ 49 50 Sorter.cs \ 50 51 Stem.cs \ 52 StemImplementation.cs \ 51 53 Stopper.cs \ 52 54 StringValueRangeProcessor.cs \ 53 55 TermGenerator.cs \ -
xapian-bindings/python/generate-python-exceptions
23 23 my @allclasses = (@baseclasses, @classes); 24 24 25 25 my @directorclasses = qw( 26 MatchDecider ExpandDecider Stopper ValueRangeProcessor Sorter KeyMaker 26 MatchDecider ExpandDecider Stopper ValueRangeProcessor Sorter KeyMaker StemImplementation 27 27 ); 28 28 29 29 my @posting_source_virtual_methods = qw( … … 190 190 SWIG_fail; 191 191 } 192 192 } 193 %exception Xapian::StemImplementation::get_description { 194 try { 195 $action 196 } catch (...) { 197 Xapian::SetPythonException(); 198 SWIG_fail; 199 } 200 } 193 201 194 202 /* If a Python error is raised by a call to a director function, the following 195 203 * code should cause a C++ exception to be thrown. -
xapian-bindings/xapian.i
768 768 %ignore Xapian::QueryParser::QueryParser(const QueryParser &); 769 769 %include <xapian/queryparser.h> 770 770 771 namespace Xapian { 772 namespace Internal { 773 struct RefCntBase { 774 }; 775 } 776 } 777 %feature("director") Xapian::StemImplementation; 778 771 779 %ignore Xapian::Stem::internal; 772 780 %ignore Xapian::Stem::operator=; 773 781 %ignore Xapian::Stem::Stem();