1 | Index: queryparser/termgenerator_internal.h
|
---|
2 | ===================================================================
|
---|
3 | --- queryparser/termgenerator_internal.h (revision 10133)
|
---|
4 | +++ queryparser/termgenerator_internal.h (working copy)
|
---|
5 | @@ -33,7 +33,7 @@
|
---|
6 |
|
---|
7 | class TermGenerator::Internal : public Xapian::Internal::RefCntBase {
|
---|
8 | friend class TermGenerator;
|
---|
9 | - Stem stemmer;
|
---|
10 | + Xapian::Internal::RefCntPtr<Xapian::BaseStem> stemmer;
|
---|
11 | const Stopper * stopper;
|
---|
12 | Document doc;
|
---|
13 | termcount termpos;
|
---|
14 | Index: queryparser/termgenerator.cc
|
---|
15 | ===================================================================
|
---|
16 | --- queryparser/termgenerator.cc (revision 10133)
|
---|
17 | +++ queryparser/termgenerator.cc (working copy)
|
---|
18 | @@ -46,7 +46,7 @@
|
---|
19 | void
|
---|
20 | TermGenerator::set_stemmer(const Xapian::Stem & stemmer)
|
---|
21 | {
|
---|
22 | - internal->stemmer = stemmer;
|
---|
23 | + internal->stemmer = stemmer.internal;
|
---|
24 | }
|
---|
25 |
|
---|
26 | void
|
---|
27 | @@ -122,7 +122,7 @@
|
---|
28 | string s("Xapian::TermGenerator(");
|
---|
29 | if (internal.get()) {
|
---|
30 | s += "stem=";
|
---|
31 | - s += internal->stemmer.get_description();
|
---|
32 | + s += internal->stemmer->get_description();
|
---|
33 | if (internal->stopper) {
|
---|
34 | s += ", stopper set";
|
---|
35 | }
|
---|
36 | Index: queryparser/queryparser.lemony
|
---|
37 | ===================================================================
|
---|
38 | --- queryparser/queryparser.lemony (revision 10133)
|
---|
39 | +++ queryparser/queryparser.lemony (working copy)
|
---|
40 | @@ -182,7 +182,7 @@
|
---|
41 | : qpi(qpi_), error(NULL), flags(flags_) { }
|
---|
42 |
|
---|
43 | string stem_term(const string &term) {
|
---|
44 | - return qpi->stemmer(term);
|
---|
45 | + return (*qpi->stemmer)(term);
|
---|
46 | }
|
---|
47 |
|
---|
48 | void add_to_stoplist(const Term * term) {
|
---|
49 | @@ -907,7 +907,7 @@
|
---|
50 | // when used with positional operators".
|
---|
51 | stem_strategy stem_term = stem_action;
|
---|
52 | if (stem_term != STEM_NONE) {
|
---|
53 | - if (!stemmer.internal.get()) {
|
---|
54 | + if (!stemmer.get()) {
|
---|
55 | // No stemmer is set.
|
---|
56 | stem_term = STEM_NONE;
|
---|
57 | } else if (stem_term == STEM_SOME) {
|
---|
58 | Index: queryparser/queryparser_internal.h
|
---|
59 | ===================================================================
|
---|
60 | --- queryparser/queryparser_internal.h (revision 10133)
|
---|
61 | +++ queryparser/queryparser_internal.h (working copy)
|
---|
62 | @@ -57,7 +57,7 @@
|
---|
63 | class QueryParser::Internal : public Xapian::Internal::RefCntBase {
|
---|
64 | friend class QueryParser;
|
---|
65 | friend class ::State;
|
---|
66 | - Stem stemmer;
|
---|
67 | + Xapian::Internal::RefCntPtr<Xapian::BaseStem> stemmer;
|
---|
68 | stem_strategy stem_action;
|
---|
69 | const Stopper * stopper;
|
---|
70 | Query::op default_op;
|
---|
71 | Index: queryparser/queryparser.cc
|
---|
72 | ===================================================================
|
---|
73 | --- queryparser/queryparser.cc (revision 10133)
|
---|
74 | +++ queryparser/queryparser.cc (working copy)
|
---|
75 | @@ -74,7 +74,7 @@
|
---|
76 | void
|
---|
77 | QueryParser::set_stemmer(const Xapian::Stem & stemmer)
|
---|
78 | {
|
---|
79 | - internal->stemmer = stemmer;
|
---|
80 | + internal->stemmer = stemmer.internal;
|
---|
81 | }
|
---|
82 |
|
---|
83 | void
|
---|
84 | Index: queryparser/termgenerator_internal.cc
|
---|
85 | ===================================================================
|
---|
86 | --- queryparser/termgenerator_internal.cc (revision 10133)
|
---|
87 | +++ queryparser/termgenerator_internal.cc (working copy)
|
---|
88 | @@ -206,7 +206,7 @@
|
---|
89 | }
|
---|
90 | if ((flags & FLAG_SPELLING) && prefix.empty()) db.add_spelling(term);
|
---|
91 |
|
---|
92 | - if (!stemmer.internal.get()) continue;
|
---|
93 | + if (!stemmer.get()) continue;
|
---|
94 |
|
---|
95 | if (stop_mode == STOPWORDS_INDEX_UNSTEMMED_ONLY && (*stopper)(term))
|
---|
96 | continue;
|
---|
97 | @@ -218,7 +218,7 @@
|
---|
98 | // Add stemmed form without positional information.
|
---|
99 | string stem("Z");
|
---|
100 | stem += prefix;
|
---|
101 | - stem += stemmer(term);
|
---|
102 | + stem += (*stemmer)(term);
|
---|
103 | doc.add_term(stem, weight);
|
---|
104 | }
|
---|
105 | }
|
---|
106 | Index: tests/nocompile_stem1.cc
|
---|
107 | ===================================================================
|
---|
108 | --- tests/nocompile_stem1.cc (revision 0)
|
---|
109 | +++ tests/nocompile_stem1.cc (revision 0)
|
---|
110 | @@ -0,0 +1,29 @@
|
---|
111 | +/** @file nocompile_stem1.cc
|
---|
112 | + * @brief Check that it's not possible to instantiate an instance of one of
|
---|
113 | + * the built-in BaseStem subclasses on the stack.
|
---|
114 | + */
|
---|
115 | +/* Copyright 2008 Lemur Consulting Ltd
|
---|
116 | + *
|
---|
117 | + * This program is free software; you can redistribute it and/or
|
---|
118 | + * modify it under the terms of the GNU General Public License as
|
---|
119 | + * published by the Free Software Foundation; either version 2 of the
|
---|
120 | + * License, or (at your option) any later version.
|
---|
121 | + *
|
---|
122 | + * This program is distributed in the hope that it will be useful,
|
---|
123 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
124 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
125 | + * GNU General Public License for more details.
|
---|
126 | + *
|
---|
127 | + * You should have received a copy of the GNU General Public License
|
---|
128 | + * along with this program; if not, write to the Free Software
|
---|
129 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
---|
130 | + * USA
|
---|
131 | + */
|
---|
132 | +
|
---|
133 | +#include <config.h>
|
---|
134 | +#include <xapian.h>
|
---|
135 | +
|
---|
136 | +int main()
|
---|
137 | +{
|
---|
138 | + Xapian::SnowballStem stem("en");
|
---|
139 | +}
|
---|
140 |
|
---|
141 | Property changes on: tests/nocompile_stem1.cc
|
---|
142 | ___________________________________________________________________
|
---|
143 | Name: svn:eol-style
|
---|
144 | + native
|
---|
145 |
|
---|
146 | Index: tests/nocompile_stem2.cc
|
---|
147 | ===================================================================
|
---|
148 | --- tests/nocompile_stem2.cc (revision 0)
|
---|
149 | +++ tests/nocompile_stem2.cc (revision 0)
|
---|
150 | @@ -0,0 +1,42 @@
|
---|
151 | +/** @file nocompile_stem2.cc
|
---|
152 | + * @brief Check that it's not possible to instantiate an instance of a
|
---|
153 | + * SnowballStem subclass on the stack.
|
---|
154 | + */
|
---|
155 | +/* Copyright 2008 Lemur Consulting Ltd
|
---|
156 | + *
|
---|
157 | + * This program is free software; you can redistribute it and/or
|
---|
158 | + * modify it under the terms of the GNU General Public License as
|
---|
159 | + * published by the Free Software Foundation; either version 2 of the
|
---|
160 | + * License, or (at your option) any later version.
|
---|
161 | + *
|
---|
162 | + * This program is distributed in the hope that it will be useful,
|
---|
163 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
164 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
165 | + * GNU General Public License for more details.
|
---|
166 | + *
|
---|
167 | + * You should have received a copy of the GNU General Public License
|
---|
168 | + * along with this program; if not, write to the Free Software
|
---|
169 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
---|
170 | + * USA
|
---|
171 | + */
|
---|
172 | +
|
---|
173 | +#include <config.h>
|
---|
174 | +#include <xapian.h>
|
---|
175 | +
|
---|
176 | +class SnowballStemSubclass : public Xapian::SnowballStem {
|
---|
177 | + protected:
|
---|
178 | + // All user derived subclasses have to make their destructor
|
---|
179 | + // restricted, so that the class can't be instantiated on the stack.
|
---|
180 | + ~SnowballStemSubclass() {}
|
---|
181 | + public:
|
---|
182 | + SnowballStemSubclass(const std::string & language)
|
---|
183 | + : Xapian::SnowballStem(language) {}
|
---|
184 | +
|
---|
185 | + std::string operator()(const std::string &word) const { return "!2" + Xapian::SnowballStem::operator()(word); }
|
---|
186 | + std::string get_description() const { return "StemSubclass"; }
|
---|
187 | +};
|
---|
188 | +
|
---|
189 | +int main()
|
---|
190 | +{
|
---|
191 | + SnowballStemSubclass stem("en");
|
---|
192 | +}
|
---|
193 |
|
---|
194 | Property changes on: tests/nocompile_stem2.cc
|
---|
195 | ___________________________________________________________________
|
---|
196 | Name: svn:eol-style
|
---|
197 | + native
|
---|
198 |
|
---|
199 | Index: tests/nocompiletest.in
|
---|
200 | ===================================================================
|
---|
201 | --- tests/nocompiletest.in (revision 0)
|
---|
202 | +++ tests/nocompiletest.in (revision 0)
|
---|
203 | @@ -0,0 +1,61 @@
|
---|
204 | +#!/bin/sh
|
---|
205 | +# @configure_input@
|
---|
206 | +#
|
---|
207 | +# Copyright (C) 2002,2003,2004,2005,2006,2007 Olly Betts
|
---|
208 | +#
|
---|
209 | +# This program is free software; you can redistribute it and/or
|
---|
210 | +# modify it under the terms of the GNU General Public License as
|
---|
211 | +# published by the Free Software Foundation; either version 2 of the
|
---|
212 | +# License, or (at your option) any later version.
|
---|
213 | +#
|
---|
214 | +# This program is distributed in the hope that it will be useful,
|
---|
215 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
216 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
217 | +# GNU General Public License for more details.
|
---|
218 | +#
|
---|
219 | +# You should have received a copy of the GNU General Public License
|
---|
220 | +# along with this program; if not, write to the Free Software
|
---|
221 | +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
---|
222 | +# USA
|
---|
223 | +
|
---|
224 | +# Test that various programs don't compile.
|
---|
225 | +
|
---|
226 | +EXEEXT="@EXEEXT@"
|
---|
227 | +SRCDIR="@srcdir@"
|
---|
228 | +out="nocompiletest_stdout.tmp"
|
---|
229 | +err="nocompiletest_stderr.tmp"
|
---|
230 | +
|
---|
231 | +TESTS="stem1 stem2"
|
---|
232 | +exitstatus=0
|
---|
233 | +failures=0
|
---|
234 | +
|
---|
235 | +echo "Attempting to build valid test program"
|
---|
236 | +# First, check that building "nocompile_pass" works, to ensure that the
|
---|
237 | +# platform actually allows anything to compile.
|
---|
238 | +rm -f "nocompile_pass${EXEEXT}"
|
---|
239 | +make "nocompile_pass${EXEEXT}" >"$out" 2>"$err"
|
---|
240 | +if [ $? -ne 0 ] ; then
|
---|
241 | + cat < "$out"
|
---|
242 | + echo
|
---|
243 | + echo "Failed to compile valid test program, so unable to test for compilation"
|
---|
244 | + echo "problems with invalid test programs."
|
---|
245 | + echo
|
---|
246 | + cat < "$err"
|
---|
247 | + rm -f $out $err
|
---|
248 | + exit 1
|
---|
249 | +fi
|
---|
250 | +
|
---|
251 | +# Now, try compiling each of the test programs, checking that compilation fails.
|
---|
252 | +for test in $TESTS
|
---|
253 | +do
|
---|
254 | + echo "Testing build of nocompile_${test}"
|
---|
255 | + rm -f "nocompile_${test}${EXEEXT}"
|
---|
256 | + make "nocompile_${test}${EXEEXT}" >"$out" 2>"$err"
|
---|
257 | + if [ $? -eq 0 ] ; then
|
---|
258 | + echo "FAIL: Compilation of invalid test program succeeded."
|
---|
259 | + exitstatus=1
|
---|
260 | + fi
|
---|
261 | +done
|
---|
262 | +
|
---|
263 | +rm -f $out $err
|
---|
264 | +exit $exitstatus
|
---|
265 | Index: tests/nocompile_pass.cc
|
---|
266 | ===================================================================
|
---|
267 | --- tests/nocompile_pass.cc (revision 0)
|
---|
268 | +++ tests/nocompile_pass.cc (revision 0)
|
---|
269 | @@ -0,0 +1,29 @@
|
---|
270 | +/** @file nocompile_pass.cc
|
---|
271 | + * @brief Simple program using Xapian which should compile and link correctly.
|
---|
272 | + */
|
---|
273 | +/* Copyright 2008 Lemur Consulting Ltd
|
---|
274 | + *
|
---|
275 | + * This program is free software; you can redistribute it and/or
|
---|
276 | + * modify it under the terms of the GNU General Public License as
|
---|
277 | + * published by the Free Software Foundation; either version 2 of the
|
---|
278 | + * License, or (at your option) any later version.
|
---|
279 | + *
|
---|
280 | + * This program is distributed in the hope that it will be useful,
|
---|
281 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
282 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
283 | + * GNU General Public License for more details.
|
---|
284 | + *
|
---|
285 | + * You should have received a copy of the GNU General Public License
|
---|
286 | + * along with this program; if not, write to the Free Software
|
---|
287 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
---|
288 | + * USA
|
---|
289 | + */
|
---|
290 | +
|
---|
291 | +#include <config.h>
|
---|
292 | +#include <xapian.h>
|
---|
293 | +
|
---|
294 | +int main()
|
---|
295 | +{
|
---|
296 | + Xapian::Stem stem("en");
|
---|
297 | + stem("hello");
|
---|
298 | +}
|
---|
299 |
|
---|
300 | Property changes on: tests/nocompile_pass.cc
|
---|
301 | ___________________________________________________________________
|
---|
302 | Name: svn:eol-style
|
---|
303 | + native
|
---|
304 |
|
---|
305 | Index: tests/api_nodb.cc
|
---|
306 | ===================================================================
|
---|
307 | --- tests/api_nodb.cc (revision 10133)
|
---|
308 | +++ tests/api_nodb.cc (working copy)
|
---|
309 | @@ -284,6 +284,37 @@
|
---|
310 | return true;
|
---|
311 | }
|
---|
312 |
|
---|
313 | +class StemSubclass : public Xapian::BaseStem {
|
---|
314 | + private:
|
---|
315 | + virtual ~StemSubclass() {}
|
---|
316 | + public:
|
---|
317 | + std::string operator()(const std::string &word) const { return "!" + word; }
|
---|
318 | + std::string get_description() const { return "StemSubclass"; }
|
---|
319 | +};
|
---|
320 | +
|
---|
321 | +class SnowballStemSubclass : public Xapian::SnowballStem {
|
---|
322 | + public:
|
---|
323 | + SnowballStemSubclass(const std::string & language)
|
---|
324 | + : Xapian::SnowballStem(language) {}
|
---|
325 | +
|
---|
326 | + std::string operator()(const std::string &word) const { return "!2" + Xapian::SnowballStem::operator()(word); }
|
---|
327 | + std::string get_description() const { return "StemSubclass"; }
|
---|
328 | +};
|
---|
329 | +
|
---|
330 | +// Test subclassing of stem.
|
---|
331 | +DEFINE_TESTCASE(stemsubclass1, !backend) {
|
---|
332 | + Xapian::Stem stemmer(new StemSubclass());
|
---|
333 | + TEST_EQUAL(stemmer("foods"), "!foods");
|
---|
334 | + TEST_EQUAL(stemmer("food"), "!food");
|
---|
335 | + TEST_EQUAL(stemmer(""), "!");
|
---|
336 | +
|
---|
337 | + Xapian::Stem stemmer2(new SnowballStemSubclass("en"));
|
---|
338 | + TEST_EQUAL(stemmer2("foods"), "!2food");
|
---|
339 | + TEST_EQUAL(stemmer2("food"), "!2food");
|
---|
340 | + TEST_EQUAL(stemmer2(""), "!2");
|
---|
341 | + return true;
|
---|
342 | +}
|
---|
343 | +
|
---|
344 | // Some simple tests of the built in weighting schemes.
|
---|
345 | DEFINE_TESTCASE(weight1, !backend) {
|
---|
346 | Xapian::Weight * wt;
|
---|
347 | Index: tests/Makefile.am
|
---|
348 | ===================================================================
|
---|
349 | --- tests/Makefile.am (revision 10133)
|
---|
350 | +++ tests/Makefile.am (working copy)
|
---|
351 | @@ -17,11 +17,13 @@
|
---|
352 |
|
---|
353 | check-none: apitest$(EXEEXT) \
|
---|
354 | internaltest$(EXEEXT) \
|
---|
355 | + nocompiletest \
|
---|
356 | stemtest$(EXEEXT) \
|
---|
357 | queryparsertest$(EXEEXT) \
|
---|
358 | termgentest$(EXEEXT)
|
---|
359 | $(TESTS_ENVIRONMENT) ./apitest$(EXEEXT) -b none
|
---|
360 | $(TESTS_ENVIRONMENT) ./internaltest$(EXEEXT)
|
---|
361 | + $(TESTS_ENVIRONMENT) ./nocompiletest
|
---|
362 | $(TESTS_ENVIRONMENT) ./stemtest$(EXEEXT)
|
---|
363 | $(TESTS_ENVIRONMENT) ./queryparsertest$(EXEEXT)
|
---|
364 | $(TESTS_ENVIRONMENT) ./termgentest$(EXEEXT)
|
---|
365 | @@ -62,9 +64,18 @@
|
---|
366 | INCLUDES += -I$(top_srcdir)/backends/quartz
|
---|
367 | endif
|
---|
368 |
|
---|
369 | +nocompile_programs = \
|
---|
370 | + nocompile_pass \
|
---|
371 | + nocompile_stem1 \
|
---|
372 | + nocompile_stem2
|
---|
373 | +
|
---|
374 | ## Test programs to be run
|
---|
375 | -TESTS = $(quartz_tests) \
|
---|
376 | - apitest$(EXEEXT) internaltest$(EXEEXT) stemtest$(EXEEXT) \
|
---|
377 | +TESTS = \
|
---|
378 | + nocompiletest \
|
---|
379 | + $(quartz_tests) \
|
---|
380 | + apitest$(EXEEXT) \
|
---|
381 | + internaltest$(EXEEXT) \
|
---|
382 | + stemtest$(EXEEXT) \
|
---|
383 | queryparsertest$(EXEEXT) \
|
---|
384 | termgentest$(EXEEXT)
|
---|
385 |
|
---|
386 | @@ -72,9 +83,13 @@
|
---|
387 | check_PROGRAMS = $(quartz_check_programs) \
|
---|
388 | apitest internaltest stemtest queryparsertest termgentest
|
---|
389 |
|
---|
390 | -# Make sure runtest is up to date before running tests
|
---|
391 | -check_SCRIPTS = runtest
|
---|
392 | +## We want to have rules for the nocompile programs, but not try building
|
---|
393 | +## ourselves (nocompiletest calls back to the makefile to build them).
|
---|
394 | +EXTRA_PROGRAMS = $(nocompile_programs)
|
---|
395 |
|
---|
396 | +# We include runtest here to make sure it is up to date before running tests
|
---|
397 | +check_SCRIPTS = nocompiletest runtest
|
---|
398 | +
|
---|
399 | ## Sources:
|
---|
400 |
|
---|
401 | noinst_HEADERS = apitest.h
|
---|
402 | @@ -113,6 +128,13 @@
|
---|
403 | termgentest_LDFLAGS = -no-install $(ldflags)
|
---|
404 | termgentest_LDADD = ../libgetopt.la ../libxapian.la
|
---|
405 |
|
---|
406 | +nocompile_pass_SOURCES = nocompile_pass.cc
|
---|
407 | +nocompile_pass_LDADD = ../libxapian.la
|
---|
408 | +nocompile_stem1_SOURCES = nocompile_stem1.cc
|
---|
409 | +nocompile_stem1_LDADD = ../libxapian.la
|
---|
410 | +nocompile_stem2_SOURCES = nocompile_stem2.cc
|
---|
411 | +nocompile_stem2_LDADD = ../libxapian.la
|
---|
412 | +
|
---|
413 | BUILT_SOURCES =
|
---|
414 |
|
---|
415 | if MAINTAINER_MODE
|
---|
416 | @@ -150,6 +172,7 @@
|
---|
417 |
|
---|
418 | ## Distribute test data:
|
---|
419 | EXTRA_DIST +=\
|
---|
420 | + nocompiletest.in \
|
---|
421 | runtest.in \
|
---|
422 | valgrind.supp \
|
---|
423 | testdata/apitest_onedoc.txt \
|
---|
424 | Index: configure.ac
|
---|
425 | ===================================================================
|
---|
426 | --- configure.ac (revision 10133)
|
---|
427 | +++ configure.ac (working copy)
|
---|
428 | @@ -1114,6 +1114,7 @@
|
---|
429 | docs/doxygen_full_conf
|
---|
430 | xapian-core.spec
|
---|
431 | ])
|
---|
432 | +AC_CONFIG_FILES([tests/nocompiletest], [chmod +x tests/nocompiletest])
|
---|
433 | AC_CONFIG_FILES([tests/runtest], [chmod +x tests/runtest])
|
---|
434 | AC_CONFIG_FILES([tests/runsrv], [chmod +x tests/runsrv])
|
---|
435 | AC_CONFIG_FILES([xapian-config], [chmod +x xapian-config])
|
---|
436 | Index: include/xapian/stem.h
|
---|
437 | ===================================================================
|
---|
438 | --- include/xapian/stem.h (revision 10133)
|
---|
439 | +++ include/xapian/stem.h (working copy)
|
---|
440 | @@ -2,6 +2,7 @@
|
---|
441 | * \brief stemming algorithms
|
---|
442 | */
|
---|
443 | /* Copyright (C) 2005,2007 Olly Betts
|
---|
444 | + * Copyright (C) 2008 Lemur Consulting Ltd
|
---|
445 | *
|
---|
446 | * This program is free software; you can redistribute it and/or
|
---|
447 | * modify it under the terms of the GNU General Public License as
|
---|
448 | @@ -28,27 +29,62 @@
|
---|
449 |
|
---|
450 | namespace Xapian {
|
---|
451 |
|
---|
452 | -/// Class representing a stemming algorithm.
|
---|
453 | -class XAPIAN_VISIBILITY_DEFAULT Stem {
|
---|
454 | - public:
|
---|
455 | - /// @private @internal Class representing the stemmer internals.
|
---|
456 | - class Internal;
|
---|
457 | - /// @private @internal Reference counted internals.
|
---|
458 | - Xapian::Internal::RefCntPtr<Internal> internal;
|
---|
459 | +/// Base representing a stemming algorithm.
|
---|
460 | +class XAPIAN_VISIBILITY_DEFAULT BaseStem : public Xapian::Internal::RefCntBase {
|
---|
461 | + /// No copying allowed.
|
---|
462 | + BaseStem(const BaseStem & o);
|
---|
463 |
|
---|
464 | - /// Copy constructor.
|
---|
465 | - Stem(const Stem & o);
|
---|
466 | + /// No assignment allowed.
|
---|
467 | + void operator=(const BaseStem & o);
|
---|
468 |
|
---|
469 | - /// Assignment.
|
---|
470 | - void operator=(const Stem & o);
|
---|
471 | + protected:
|
---|
472 | + /** Destructor is protected since it should only be called by subclasses
|
---|
473 | + * and RefCntPtr. Subclasses should make their destructors protected,
|
---|
474 | + * to force users to use a RefCntPtr to reference them.
|
---|
475 | + */
|
---|
476 | + virtual ~BaseStem() {}
|
---|
477 |
|
---|
478 | - /** Construct a Xapian::Stem object which doesn't change terms.
|
---|
479 | + friend class Xapian::Internal::RefCntPtr<BaseStem>;
|
---|
480 | +
|
---|
481 | + public:
|
---|
482 | + BaseStem() {}
|
---|
483 | +
|
---|
484 | + /** Stem a word.
|
---|
485 | *
|
---|
486 | - * Equivalent to Stem("none").
|
---|
487 | + * @param word a word to stem.
|
---|
488 | + * @return the stemmed form of the word.
|
---|
489 | */
|
---|
490 | - Stem();
|
---|
491 | + virtual std::string operator()(const std::string &word) const = 0;
|
---|
492 |
|
---|
493 | - /** Construct a Xapian::Stem object for a particular language.
|
---|
494 | + /// Return a string describing this object.
|
---|
495 | + virtual std::string get_description() const = 0;
|
---|
496 | +};
|
---|
497 | +
|
---|
498 | +/// Class representing one of the snowball stemming algorithms.
|
---|
499 | +class XAPIAN_VISIBILITY_DEFAULT SnowballStem : public BaseStem {
|
---|
500 | + /// No copying allowed.
|
---|
501 | + SnowballStem(const SnowballStem & o);
|
---|
502 | +
|
---|
503 | + /// No assignment allowed.
|
---|
504 | + void operator=(const SnowballStem & o);
|
---|
505 | +
|
---|
506 | + protected:
|
---|
507 | + /** Destructor is protected since it should only be called by subclasses
|
---|
508 | + * and RefCntPtr. Subclasses should make their destructors protected,
|
---|
509 | + * to force users to use a RefCntPtr to reference them.
|
---|
510 | + */
|
---|
511 | + virtual ~SnowballStem();
|
---|
512 | +
|
---|
513 | + public:
|
---|
514 | + /// @private @internal Class representing the snowball stemmer internals.
|
---|
515 | + class Internal;
|
---|
516 | +
|
---|
517 | + private:
|
---|
518 | + /// @private @internal Snowball stemmer internals.
|
---|
519 | + Internal * internal;
|
---|
520 | +
|
---|
521 | + public:
|
---|
522 | + /** Construct a Xapian::SnowballStem object for a particular language.
|
---|
523 | *
|
---|
524 | * @param language Either the English name for the language
|
---|
525 | * or the two letter ISO639 code.
|
---|
526 | @@ -56,7 +92,6 @@
|
---|
527 | * The following language names are understood (aliases follow the
|
---|
528 | * name):
|
---|
529 | *
|
---|
530 | - * - none - don't stem terms
|
---|
531 | * - danish (da)
|
---|
532 | * - dutch (nl)
|
---|
533 | * - english (en) - Martin Porter's 2002 revision of his stemmer
|
---|
534 | @@ -76,15 +111,12 @@
|
---|
535 | * @exception Xapian::InvalidArgumentError is thrown if
|
---|
536 | * language isn't recognised.
|
---|
537 | */
|
---|
538 | - explicit Stem(const std::string &language);
|
---|
539 | + explicit SnowballStem(const std::string &language);
|
---|
540 |
|
---|
541 | - /// Destructor.
|
---|
542 | - ~Stem();
|
---|
543 | -
|
---|
544 | /** Stem a word.
|
---|
545 | *
|
---|
546 | - * @param word a word to stem.
|
---|
547 | - * @return the stem
|
---|
548 | + * @param word a word to stem.
|
---|
549 | + * @return the stemmed form of the word.
|
---|
550 | */
|
---|
551 | std::string operator()(const std::string &word) const;
|
---|
552 |
|
---|
553 | @@ -104,6 +136,74 @@
|
---|
554 | static std::string get_available_languages();
|
---|
555 | };
|
---|
556 |
|
---|
557 | +/// Class wrapping a reference counted stemming algorithm.
|
---|
558 | +class XAPIAN_VISIBILITY_DEFAULT Stem {
|
---|
559 | + public:
|
---|
560 | + /// @private @internal Reference counted internals.
|
---|
561 | + Xapian::Internal::RefCntPtr<Xapian::BaseStem> internal;
|
---|
562 | +
|
---|
563 | + /// Copy constructor.
|
---|
564 | + Stem(const Stem & o) : internal(o.internal) { }
|
---|
565 | +
|
---|
566 | + /// Assignment.
|
---|
567 | + void operator=(const Stem & o) { internal = o.internal; }
|
---|
568 | +
|
---|
569 | + /** Construct a Xapian::Stem object from a pointer to a BaseStem.
|
---|
570 | + */
|
---|
571 | + Stem(Xapian::Internal::RefCntPtr<Xapian::BaseStem> internal_)
|
---|
572 | + : internal(internal_) {}
|
---|
573 | +
|
---|
574 | + /** Construct a Xapian::Stem object which doesn't change terms.
|
---|
575 | + */
|
---|
576 | + Stem() : internal(0) {}
|
---|
577 | +
|
---|
578 | + /** Construct a Xapian::Stem object for a particular language.
|
---|
579 | + *
|
---|
580 | + * This constructor is included for convenience, and is equivalent to
|
---|
581 | + * Stem(new SnowballStem(language)) - except that a language parameter of
|
---|
582 | + * "none" will produce a stemmer which doesn't remove any stems.
|
---|
583 | + *
|
---|
584 | + * See Xapian::SnowballStem for details.
|
---|
585 | + */
|
---|
586 | + explicit Stem(const std::string &language)
|
---|
587 | + {
|
---|
588 | + if (language == "none")
|
---|
589 | + internal = 0;
|
---|
590 | + else
|
---|
591 | + internal = new Xapian::SnowballStem(language);
|
---|
592 | + }
|
---|
593 | +
|
---|
594 | + /** Stem a word.
|
---|
595 | + *
|
---|
596 | + * @param word a word to stem.
|
---|
597 | + * @return the stemmed form of the word.
|
---|
598 | + */
|
---|
599 | + std::string operator()(const std::string &word) const
|
---|
600 | + {
|
---|
601 | + if (!internal.get()) return word;
|
---|
602 | + return internal->operator()(word);
|
---|
603 | + }
|
---|
604 | +
|
---|
605 | + /// Return a string describing this object.
|
---|
606 | + std::string get_description() const
|
---|
607 | + {
|
---|
608 | + if (!internal.get()) return "Xapian::Stem()";
|
---|
609 | + return "Xapian::Stem(" + internal->get_description() + ")";
|
---|
610 | + }
|
---|
611 | +
|
---|
612 | + /** Return a list of available languages.
|
---|
613 | + *
|
---|
614 | + * This is included for convenience, and is equivalent to
|
---|
615 | + * SnowballStem.get_available_languages().
|
---|
616 | + *
|
---|
617 | + * See Xapian::SnowballStem for details.
|
---|
618 | + */
|
---|
619 | + static std::string get_available_languages()
|
---|
620 | + {
|
---|
621 | + return SnowballStem::get_available_languages();
|
---|
622 | + }
|
---|
623 | +};
|
---|
624 | +
|
---|
625 | }
|
---|
626 |
|
---|
627 | #endif // XAPIAN_INCLUDED_STEM_H
|
---|
628 | Index: languages/Makefile.mk
|
---|
629 | ===================================================================
|
---|
630 | --- languages/Makefile.mk (revision 10133)
|
---|
631 | +++ languages/Makefile.mk (working copy)
|
---|
632 | @@ -56,10 +56,10 @@
|
---|
633 | $(CC_FOR_BUILD) -o languages/snowball -DDISABLE_JAVA `for f in $(snowball_sources) ; do test -f $$f && echo $$f || echo $(srcdir)/$$f ; done`
|
---|
634 |
|
---|
635 | .sbl.cc:
|
---|
636 | - languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem::Internal
|
---|
637 | + languages/snowball $< -o `echo $@|sed 's!\.cc$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStem::Internal
|
---|
638 |
|
---|
639 | .sbl.h:
|
---|
640 | - languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p Stem::Internal
|
---|
641 | + languages/snowball $< -o `echo $@|sed 's!\.h$$!!'` -c++ -u -n InternalStem`echo $<|sed 's!.*/\(.\).*!\1!'|tr a-z A-Z``echo $<|sed 's!.*/.!!;s!\.sbl!!'` -p SnowballStem::Internal
|
---|
642 |
|
---|
643 | languages/allsnowballheaders.h: languages/generate-allsnowballheaders languages/Makefile.mk
|
---|
644 | languages/generate-allsnowballheaders $(snowball_built_sources)
|
---|
645 | Index: languages/steminternal.cc
|
---|
646 | ===================================================================
|
---|
647 | --- languages/steminternal.cc (revision 10133)
|
---|
648 | +++ languages/steminternal.cc (working copy)
|
---|
649 | @@ -131,18 +131,18 @@
|
---|
650 |
|
---|
651 | namespace Xapian {
|
---|
652 |
|
---|
653 | -Stem::Internal::Internal()
|
---|
654 | +SnowballStem::Internal::Internal()
|
---|
655 | : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0)
|
---|
656 | {
|
---|
657 | }
|
---|
658 |
|
---|
659 | -Stem::Internal::~Internal()
|
---|
660 | +SnowballStem::Internal::~Internal()
|
---|
661 | {
|
---|
662 | lose_s(p);
|
---|
663 | }
|
---|
664 |
|
---|
665 | string
|
---|
666 | -Stem::Internal::operator()(const string & word)
|
---|
667 | +SnowballStem::Internal::operator()(const string & word)
|
---|
668 | {
|
---|
669 | const symbol * s = reinterpret_cast<const symbol *>(word.data());
|
---|
670 | replace_s(0, l, word.size(), s);
|
---|
671 | @@ -156,7 +156,7 @@
|
---|
672 |
|
---|
673 | /* Code for character groupings: utf8 cases */
|
---|
674 |
|
---|
675 | -int Stem::Internal::get_utf8(int * slot) {
|
---|
676 | +int SnowballStem::Internal::get_utf8(int * slot) {
|
---|
677 | int b0, b1;
|
---|
678 | int tmp = c;
|
---|
679 | if (tmp >= l) return 0;
|
---|
680 | @@ -171,7 +171,7 @@
|
---|
681 | * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[tmp] & 0x3F); return 3;
|
---|
682 | }
|
---|
683 |
|
---|
684 | -int Stem::Internal::get_b_utf8(int * slot) {
|
---|
685 | +int SnowballStem::Internal::get_b_utf8(int * slot) {
|
---|
686 | int b0, b1;
|
---|
687 | int tmp = c;
|
---|
688 | if (tmp <= lb) return 0;
|
---|
689 | @@ -186,7 +186,7 @@
|
---|
690 | * slot = (p[tmp] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
|
---|
691 | }
|
---|
692 |
|
---|
693 | -int Stem::Internal::in_grouping_U(const unsigned char * s, int min, int max, int repeat) {
|
---|
694 | +int SnowballStem::Internal::in_grouping_U(const unsigned char * s, int min, int max, int repeat) {
|
---|
695 | do {
|
---|
696 | int ch;
|
---|
697 | int w = get_utf8(&ch);
|
---|
698 | @@ -198,7 +198,7 @@
|
---|
699 | return 0;
|
---|
700 | }
|
---|
701 |
|
---|
702 | -int Stem::Internal::in_grouping_b_U(const unsigned char * s, int min, int max, int repeat) {
|
---|
703 | +int SnowballStem::Internal::in_grouping_b_U(const unsigned char * s, int min, int max, int repeat) {
|
---|
704 | do {
|
---|
705 | int ch;
|
---|
706 | int w = get_b_utf8(&ch);
|
---|
707 | @@ -210,7 +210,7 @@
|
---|
708 | return 0;
|
---|
709 | }
|
---|
710 |
|
---|
711 | -int Stem::Internal::out_grouping_U(const unsigned char * s, int min, int max, int repeat) {
|
---|
712 | +int SnowballStem::Internal::out_grouping_U(const unsigned char * s, int min, int max, int repeat) {
|
---|
713 | do {
|
---|
714 | int ch;
|
---|
715 | int w = get_utf8(&ch);
|
---|
716 | @@ -222,7 +222,7 @@
|
---|
717 | return 0;
|
---|
718 | }
|
---|
719 |
|
---|
720 | -int Stem::Internal::out_grouping_b_U(const unsigned char * s, int min, int max, int repeat) {
|
---|
721 | +int SnowballStem::Internal::out_grouping_b_U(const unsigned char * s, int min, int max, int repeat) {
|
---|
722 | do {
|
---|
723 | int ch;
|
---|
724 | int w = get_b_utf8(&ch);
|
---|
725 | @@ -234,21 +234,21 @@
|
---|
726 | return 0;
|
---|
727 | }
|
---|
728 |
|
---|
729 | -int Stem::Internal::eq_s(int s_size, const symbol * s) {
|
---|
730 | +int SnowballStem::Internal::eq_s(int s_size, const symbol * s) {
|
---|
731 | if (l - c < s_size || memcmp(p + c, s, s_size * sizeof(symbol)) != 0)
|
---|
732 | return 0;
|
---|
733 | c += s_size;
|
---|
734 | return 1;
|
---|
735 | }
|
---|
736 |
|
---|
737 | -int Stem::Internal::eq_s_b(int s_size, const symbol * s) {
|
---|
738 | +int SnowballStem::Internal::eq_s_b(int s_size, const symbol * s) {
|
---|
739 | if (c - lb < s_size || memcmp(p + c - s_size, s, s_size * sizeof(symbol)) != 0)
|
---|
740 | return 0;
|
---|
741 | c -= s_size;
|
---|
742 | return 1;
|
---|
743 | }
|
---|
744 |
|
---|
745 | -int Stem::Internal::find_among(const struct among * v, int v_size, const unsigned char * fnum, const among_function * f) {
|
---|
746 | +int SnowballStem::Internal::find_among(const struct among * v, int v_size, const unsigned char * fnum, const among_function * f) {
|
---|
747 | int i = 0;
|
---|
748 | int j = v_size;
|
---|
749 |
|
---|
750 | @@ -302,7 +302,7 @@
|
---|
751 | }
|
---|
752 |
|
---|
753 | /* find_among_b is for backwards processing. Same comments apply */
|
---|
754 | -int Stem::Internal::find_among_b(const struct among * v, int v_size, const unsigned char * fnum, const among_function * f) {
|
---|
755 | +int SnowballStem::Internal::find_among_b(const struct among * v, int v_size, const unsigned char * fnum, const among_function * f) {
|
---|
756 | int i = 0;
|
---|
757 | int j = v_size;
|
---|
758 |
|
---|
759 | @@ -351,7 +351,7 @@
|
---|
760 | }
|
---|
761 |
|
---|
762 | int
|
---|
763 | -Stem::Internal::replace_s(int c_bra, int c_ket, int s_size, const symbol * s)
|
---|
764 | +SnowballStem::Internal::replace_s(int c_bra, int c_ket, int s_size, const symbol * s)
|
---|
765 | {
|
---|
766 | int adjustment;
|
---|
767 | int len;
|
---|
768 | @@ -377,7 +377,7 @@
|
---|
769 | return adjustment;
|
---|
770 | }
|
---|
771 |
|
---|
772 | -int Stem::Internal::slice_check() {
|
---|
773 | +int SnowballStem::Internal::slice_check() {
|
---|
774 | Assert(p);
|
---|
775 | if (bra < 0 || bra > ket || ket > l) {
|
---|
776 | #if 0
|
---|
777 | @@ -389,19 +389,19 @@
|
---|
778 | return 0;
|
---|
779 | }
|
---|
780 |
|
---|
781 | -int Stem::Internal::slice_from_s(int s_size, const symbol * s) {
|
---|
782 | +int SnowballStem::Internal::slice_from_s(int s_size, const symbol * s) {
|
---|
783 | if (slice_check()) return -1;
|
---|
784 | replace_s(bra, ket, s_size, s);
|
---|
785 | return 0;
|
---|
786 | }
|
---|
787 |
|
---|
788 | -void Stem::Internal::insert_s(int c_bra, int c_ket, int s_size, const symbol * s) {
|
---|
789 | +void SnowballStem::Internal::insert_s(int c_bra, int c_ket, int s_size, const symbol * s) {
|
---|
790 | int adjustment = replace_s(c_bra, c_ket, s_size, s);
|
---|
791 | if (c_bra <= bra) bra += adjustment;
|
---|
792 | if (c_bra <= ket) ket += adjustment;
|
---|
793 | }
|
---|
794 |
|
---|
795 | -symbol * Stem::Internal::slice_to(symbol * v) {
|
---|
796 | +symbol * SnowballStem::Internal::slice_to(symbol * v) {
|
---|
797 | if (slice_check()) return NULL;
|
---|
798 | {
|
---|
799 | int len = ket - bra;
|
---|
800 | @@ -414,7 +414,7 @@
|
---|
801 | return v;
|
---|
802 | }
|
---|
803 |
|
---|
804 | -symbol * Stem::Internal::assign_to(symbol * v) {
|
---|
805 | +symbol * SnowballStem::Internal::assign_to(symbol * v) {
|
---|
806 | int len = l;
|
---|
807 | if (CAPACITY(v) < len) {
|
---|
808 | v = increase_size(v, len);
|
---|
809 | @@ -425,7 +425,7 @@
|
---|
810 | }
|
---|
811 |
|
---|
812 | #if 0
|
---|
813 | -void Stem::Internal::debug(int number, int line_count) {
|
---|
814 | +void SnowballStem::Internal::debug(int number, int line_count) {
|
---|
815 | int i;
|
---|
816 | int limit = SIZE(p);
|
---|
817 | /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
|
---|
818 | Index: languages/steminternal.h
|
---|
819 | ===================================================================
|
---|
820 | --- languages/steminternal.h (revision 10133)
|
---|
821 | +++ languages/steminternal.h (working copy)
|
---|
822 | @@ -41,7 +41,7 @@
|
---|
823 | #define CAPACITY(P) ((const int *)(const void *)(P))[-2]
|
---|
824 | #define SET_CAPACITY(P, N) ((int *)(void *)(P))[-2] = N
|
---|
825 |
|
---|
826 | -typedef int (*among_function)(Xapian::Stem::Internal *);
|
---|
827 | +typedef int (*among_function)(Xapian::SnowballStem::Internal *);
|
---|
828 |
|
---|
829 | struct among {
|
---|
830 | int s_size; /* length of search string (in symbols) */
|
---|
831 | @@ -60,7 +60,7 @@
|
---|
832 |
|
---|
833 | namespace Xapian {
|
---|
834 |
|
---|
835 | -class Stem::Internal : public Xapian::Internal::RefCntBase {
|
---|
836 | +class SnowballStem::Internal {
|
---|
837 | int slice_check();
|
---|
838 |
|
---|
839 | protected:
|
---|
840 | Index: languages/stem.cc
|
---|
841 | ===================================================================
|
---|
842 | --- languages/stem.cc (revision 10133)
|
---|
843 | +++ languages/stem.cc (working copy)
|
---|
844 | @@ -33,17 +33,9 @@
|
---|
845 |
|
---|
846 | namespace Xapian {
|
---|
847 |
|
---|
848 | -Stem::Stem(const Stem & o) : internal(o.internal) { }
|
---|
849 | -
|
---|
850 | -void
|
---|
851 | -Stem::operator=(const Stem & o)
|
---|
852 | +SnowballStem::SnowballStem(const std::string &language)
|
---|
853 | + : internal(0)
|
---|
854 | {
|
---|
855 | - internal = o.internal;
|
---|
856 | -}
|
---|
857 | -
|
---|
858 | -Stem::Stem() : internal(0) { }
|
---|
859 | -
|
---|
860 | -Stem::Stem(const std::string &language) : internal(0) {
|
---|
861 | if (language.empty()) return;
|
---|
862 | switch (language[0]) {
|
---|
863 | case 'd':
|
---|
864 | @@ -119,9 +111,6 @@
|
---|
865 | internal = new InternalStemNorwegian;
|
---|
866 | return;
|
---|
867 | }
|
---|
868 | - if (language == "none") {
|
---|
869 | - return;
|
---|
870 | - }
|
---|
871 | break;
|
---|
872 | case 'p':
|
---|
873 | if (language == "pt" || language == "portuguese") {
|
---|
874 | @@ -163,20 +152,23 @@
|
---|
875 | throw Xapian::InvalidArgumentError("Language code " + language + " unknown");
|
---|
876 | }
|
---|
877 |
|
---|
878 | -Stem::~Stem() { }
|
---|
879 | +SnowballStem::~SnowballStem()
|
---|
880 | +{
|
---|
881 | + delete internal;
|
---|
882 | +}
|
---|
883 |
|
---|
884 | string
|
---|
885 | -Stem::operator()(const std::string &word) const
|
---|
886 | +SnowballStem::operator()(const std::string &word) const
|
---|
887 | {
|
---|
888 | - if (!internal.get() || word.empty()) return word;
|
---|
889 | + if (word.empty()) return word;
|
---|
890 | return internal->operator()(word);
|
---|
891 | }
|
---|
892 |
|
---|
893 | string
|
---|
894 | -Stem::get_description() const
|
---|
895 | +SnowballStem::get_description() const
|
---|
896 | {
|
---|
897 | - string desc = "Xapian::Stem(";
|
---|
898 | - if (internal.get()) {
|
---|
899 | + string desc = "Xapian::SnowballStem(";
|
---|
900 | + if (internal) {
|
---|
901 | desc += internal->get_description();
|
---|
902 | desc += ')';
|
---|
903 | } else {
|
---|
904 | @@ -186,7 +178,7 @@
|
---|
905 | }
|
---|
906 |
|
---|
907 | string
|
---|
908 | -Stem::get_available_languages()
|
---|
909 | +SnowballStem::get_available_languages()
|
---|
910 | {
|
---|
911 | return LANGSTRING;
|
---|
912 | }
|
---|
913 | Index: languages/compiler/generator.c
|
---|
914 | ===================================================================
|
---|
915 | --- languages/compiler/generator.c (revision 10133)
|
---|
916 | +++ languages/compiler/generator.c (working copy)
|
---|
917 | @@ -1481,7 +1481,7 @@
|
---|
918 | if (q->type == t_routine && q->routine_called_from_among) {
|
---|
919 | q->among_func_count = ++among_func_count;
|
---|
920 | g->V[0] = q;
|
---|
921 | - w(g, "static int t~V0(Xapian::Stem::Internal * this_ptr) {~N"
|
---|
922 | + w(g, "static int t~V0(Xapian::SnowballStem::Internal * this_ptr) {~N"
|
---|
923 | " return (static_cast<Xapian::~S0 *>(this_ptr))->~V0();~N"
|
---|
924 | "}~N"
|
---|
925 | "~N");
|
---|