Ticket #282: xapian-omega-1.0.7a-from-ticket-285-and-cleaned-up-updated.patch
File xapian-omega-1.0.7a-from-ticket-285-and-cleaned-up-updated.patch, 133.7 KB (added by , 15 years ago) |
---|
-
xapian-omega-1.0.7a/ChangeLog
diff -u xapian-omega-1.0.7a/ChangeLog.orig
old new 1526 1526 1527 1527 * configure.ac: Check for strftime. 1528 1528 1529 2006-08-22 09:30:12 Reini Urban <reinhard.urban@avl.com> 1530 1531 omega-0.9.6c: 1532 * omega.m4: New file with macro XO_OMEGA_WITH 1533 * configure.ac: Cleaner interface via omega.m4: XO_OMEGA_WITH. 1534 Added HAVE_PDFTOTEXT, HAVE_PDFINFO 1535 * omindex.cc: Fix wrong timestamp comparison in cache logic 1536 * scriptindex.cc: Support TEXTCAT for language autodetection. 1537 Add lastmod and size records and values. 1538 * configfile.h: Add common VALUE_* keys for indexers and omega 1539 * ppt2text.in, excel2text.in, outlook2text.in: New scripts 1540 * scriptindex.1: updated by help2man 1541 1542 2006-08-18 15:13:32 Reini Urban <reinhard.urban@avl.com> 1543 1544 omega-0.9.6b: 1545 * omindex.cc: last_mod as value. Add HAVE_UNRAR, HAVE_MSGCONVERT, 1546 HAVE_READPST, HAVE_CATDOC checks. 1547 Add options --verbose, --silent 1548 * configure.ac: Add HAVE_CATDOC 1549 1550 2006-08-17 18:06:26 Reini Urban <reinhard.urban@avl.com> 1551 1552 omega-0.9.6a: 1553 * omindex.cc: Added last_mod check, cache_dir, libtextcat, 1554 cached virtual directories (zip,msg,pst,...). 1555 New options: -c/--nocleanup, -i/--ignore-time. 1556 Add MS-Office mimetypes (word, excel, powerpoint, outlook). 1557 Consistently log stderr to /var/log/omega/omindex-error.log. 1558 * configure.ac: Add HAVE_TEXTCAT, HAVE_UNRAR, HAVE_MSGCONVERT, 1559 HAVE_READPST, HAVE_CATDOC 1560 * commonhelp.cc: Update stemmer help with HAVE_TEXTCAT (lang autodetection) 1561 * configfile.cc: New cache_dir 1562 * Makefile.am: Prepare for omindex_test. Link omindex against configfile. 1563 * langclass, langclass.conf: New file and directory 1564 * omindex.1: updated by help2man 1565 1529 1566 Sun Jul 09 01:40:09 BST 2006 Olly Betts <olly@survex.com> 1530 1567 1531 1568 * docs/omegascript.txt: Note that (by design) an omegascript template -
xapian-omega-1.0.7a/Makefile.am
diff -u xapian-omega-1.0.7a/Makefile.am.orig
old new 55 55 images/score-8.png \ 56 56 images/score-9.png \ 57 57 xapian-omega.spec 58 omega.m4 \ 59 \$(wildcard langclass/*.lm) langclass.conf 58 60 59 61 AM_CPPFLAGS = -DCONFIGFILE_SYSTEM=\"$(sysconfdir)/omega.conf\" 60 62 61 63 pkglibbindir = $(pkglibdir)/bin 62 64 pkglibbin_PROGRAMS = omega 63 65 bin_PROGRAMS = omindex scriptindex 66 dist_libexec_SCRIPTS = outlook2text ppt2text excel2text mimeexplode msgconvert.pl 64 67 dist_bin_SCRIPTS = dbi2omega htdig2omega mbox2omega 65 68 66 69 check_PROGRAMS = htmlparsetest md5test utf8converttest 67 70 TESTS = htmlparsetest$(EXEEXT) md5test$(EXEEXT) utf8converttest$(EXEEXT) 68 71 72 check: check-am omindex_CHECK 73 74 omindex_CHECK: omindex$(EXEEXT) 75 rm -rf $(srcdir)/data 76 mkdir $(srcdir)/data 77 ./omindex --verbose -c --db $(srcdir)/data --url / $(srcdir)/test 78 ./omindex --verbose -p -c --db $(srcdir)/data --url / $(srcdir)/test 79 delve -d -r 1 $(srcdir)/data 80 delve -d -r 5 $(srcdir)/data 81 69 82 omegadatadir = $(datadir)/omega 70 83 dist_omegadata_DATA = htdig2omega.script mbox2omega.script 71 84 … … 92 105 common/safewindows.h\ 93 106 common/stringutils.h 94 107 95 AM_LDFLAGS = $(ICONV_LDFLAGS)108 AM_LDFLAGS = -no-undefined $(ICONV_LDFLAGS) 96 109 97 110 omega_SOURCES = omega.cc query.cc cgiparam.cc utils.cc configfile.cc date.cc\ 98 111 cdb_init.cc cdb_find.cc cdb_hash.cc cdb_unpack.cc loadfile.cc\ 99 112 utf8convert.cc datematchdecider.cc 100 113 omega_LDADD = $(XAPIAN_LIBS) # -lpcre … … 102 115 md5wrap.cc xmlparse.cc metaxmlparse.cc utf8convert.cc sample.cc diritor.cc\ 103 runfilter.cc freemem.cc common/msvc_dirent.cc 116 runfilter.cc freemem.cc common/msvc_dirent.cc configfile.cc 104 117 if NEED_MKDTEMP 105 118 omindex_SOURCES += portability/mkdtemp.cc 106 119 endif 107 omindex_LDADD = $(XAPIAN_LIBS) 120 omindex_LDADD = $(XAPIAN_LIBS) $(TEXTCAT_LIB) 108 121 109 122 scriptindex_SOURCES = scriptindex.cc myhtmlparse.cc htmlparse.cc\ 110 common/getopt.cc commonhelp.cc utils.cc hashterm.cc loadfile.cc\123 common/getopt.cc commonhelp.cc configfile.cc utils.cc hashterm.cc loadfile.cc\ 111 124 common/safe.cc common/stringutils.cc utf8convert.cc utf8truncate.cc 112 scriptindex_LDADD = $(XAPIAN_LIBS) 125 scriptindex_LDADD = $(XAPIAN_LIBS) $(TEXTCAT_LIB) 113 126 114 127 htmlparsetest_SOURCES = htmlparsetest.cc myhtmlparse.cc htmlparse.cc\ 115 128 utf8convert.cc … … 127 140 MAINTAINERCLEANFILES = $(dist_man_MANS) 128 141 endif 129 142 143 CLEANFILES = $(dist_libexec_SCRIPTS) $(dist_bin_SCRIPTS) 144 145 omega.conf: $(srcdir)/omega.conf.in Makefile 146 cat $(srcdir)/omega.conf.in | \ 147 sed "s,\@localstatedir\@,$(localstatedir),;" > $@ 148 ppt2text: $(srcdir)/ppt2text.in Makefile 149 cat $(srcdir)/ppt2text.in | \ 150 sed "s,\@CATPPT\@,$(CATPPT),;s,\@SED\@,$(SED),;" > $@ 151 chmod 0755 $@ 152 outlook2text: $(srcdir)/outlook2text.in mimeexplode Makefile 153 cat $(srcdir)/outlook2text.in | \ 154 sed "s,\@MSGCONVERT\@,$(MSGCONVERT),;s,\@MIMEEXPLODE\@,$(pkglibbindir)/mimeexplode,;" > $@ 155 chmod 0755 $@ 156 excel2text: $(srcdir)/excel2text.in Makefile 157 cat $(srcdir)/excel2text.in | \ 158 sed "s,\@XLS2CSV\@,$(XLS2CSV),;s,\@SED\@,$(SED),;" > $@ 159 chmod 0755 $@ 160 161 130 162 if DOCUMENTATION_RULES 131 CLEANFILES = $(dist_man_MANS)163 CLEANFILES += $(dist_man_MANS) 132 164 133 165 omindex.1: omindex$(EXEEXT) makemanpage 134 166 ./makemanpage ./omindex $(srcdir)/omindex.cc omindex.1 -
xapian-omega-1.0.7a/acinclude.m4
diff -u xapian-omega-1.0.7a/acinclude.m4.orig
old new 1 m4_include(omega.m4) -
xapian-omega-1.0.7a/configfile.cc
diff -u xapian-omega-1.0.7a/configfile.cc.orig
old new 24 24 25 25 #include <fstream> 26 26 #include <iostream> 27 using std::ifstream; 27 28 28 29 #include <sys/types.h> 29 30 #include "safesysstat.h" … … 42 43 string template_dir = "/var/lib/omega/templates/"; 43 44 string log_dir = "/var/log/omega/"; 44 45 string cdb_dir = "/var/lib/omega/cdb/"; 46 string cache_dir = "/var/lib/omega/cache/"; 45 47 46 48 /** Return true if the file fname exists. 47 49 */ … … 64 66 } 65 67 66 68 while (in) { 67 char line[1024]; 68 in.getline(line, sizeof(line)); 69 70 char *p = line; 71 while (isspace((unsigned char)*p)) ++p; 72 if (!*p || *p == '#') continue; // Ignore blank line and comments 73 74 char *q = p; 75 while (*q && !isspace((unsigned char)*q)) ++q; 76 string name(p, q - p); 77 78 p = q; 79 while (isspace((unsigned char)*p)) ++p; 80 q = p; 81 while (*q && !isspace((unsigned char)*q)) ++q; 82 string value(p, q - p); 83 84 while (*q && isspace((unsigned char)*q)) ++q; 85 if (value.empty() || *q) { 86 throw string("Bad line in configuration file `") + cfile + "'"; 87 } 88 69 string name, value; 70 in >> name >> value; 71 if (value[value.length()-1] != '/') value += "/"; 72 //std::cout << "...read conf: "<<name<<"="<<value << std::endl; 89 73 if (name == "database_dir") { 90 database_dir = value + "/";74 database_dir = value; 91 75 } else if (name == "template_dir") { 92 template_dir = value + "/";76 template_dir = value; 93 77 } else if (name == "log_dir") { 94 log_dir = value + "/";78 log_dir = value; 95 79 } else if (name == "cdb_dir") { 96 cdb_dir = value + "/"; 80 cdb_dir = value; 81 } else if (name == "cache_dir") { 82 cache_dir = value; 97 83 } 98 84 } 99 85 -
xapian-omega-1.0.7a/configfile.h
diff -u xapian-omega-1.0.7a/configfile.h.orig
old new 30 30 extern string template_dir; 31 31 extern string log_dir; 32 32 extern string cdb_dir; 33 extern string cache_dir; 33 34 34 35 void read_config_file(); 35 36 -
xapian-omega-1.0.7a/configure.ac
diff -u xapian-omega-1.0.7a/configure.ac.orig
old new 1 1 dnl Process this file with autoconf to produce a configure script. 2 2 3 AC_INIT(xapian-omega, 1.0.7 )dnl FIXME:bugreport addr as third argument3 AC_INIT(xapian-omega, 1.0.7a, reinhard.urban@avl.com) dnl bugreport addr as third argument 4 4 dnl See HACKING document for details of the reasons for required versions. 5 5 AC_PREREQ([2.59]) 6 6 AM_INIT_AUTOMAKE([1.9 -Wportability tar-ustar]) … … 259 259 dnl Check processor endianness. 260 260 AC_C_BIGENDIAN 261 261 262 dnl hardcode sed path in the pkglibbin_SCRIPTS 263 AC_PATH_PROG([SED], [sed]) 264 AC_DEFINE_UNQUOTED([SED],"$SED", [sed path]) 265 AC_SUBST([SED]) 266 262 267 if test x"$enable_documentation" = xyes; then 263 268 dnl Check for perl. (Needed to make man pages from "--help" output). 264 269 AC_PATH_PROG(PERL, perl, []) … … 274 279 test -z "$RST2HTML" && AC_MSG_ERROR([rst2html is required to build documentation]) 275 280 fi 276 281 282 AC_ARG_WITH([textcat], 283 [ --without-textcat don't use libtextcat for automatic language classification]) 284 if test xno != x$with_textcat; then 285 AC_MSG_CHECKING(for libtextcat) 286 textcat_candidates="$with_textcat $HOME/local/lib* /usr*/local/lib* /usr/lib*" 287 for lib in $textcat_candidates; do 288 if test -f $lib/libtextcat.la; then 289 TEXTCAT_LIB="-L$lib -ltextcat" 290 break 291 elif test -f $lib/libtextcat.a; then 292 TEXTCAT_LIB="$lib/libtextcat.a" 293 break 294 fi 295 done 296 if test -z "$TEXTCAT_LIB"; then 297 AC_MSG_RESULT(not found) 298 else 299 AC_MSG_RESULT($lib) 300 AC_DEFINE([HAVE_TEXTCAT],1, [Define to 1 if you have libtextcat for automatic language classification]) 301 AC_SUBST(TEXTCAT_LIB) 302 have_textcat=yes 303 fi 304 fi 305 306 dnl Check for unrar (default: disabled). Pending patent issues. 307 AC_ARG_WITH([unrar], 308 [ --with-unrar use unrar and extract .rar files], 309 [if test xyes = x$with_unrar; then 310 AC_PATH_PROG([UNRAR], [unrar]) 311 test -n "$UNRAR" && ( 312 AC_DEFINE([HAVE_UNRAR],1, [Define to 1 if you have unrar to extract .rar archives]) 313 AC_DEFINE_UNQUOTED([UNRAR],"$UNRAR",[unrar path]) 314 ) 315 fi]) 316 317 XO_OMEGA_WITH([unzip],[extract .zip archives]) 318 319 AC_ARG_WITH([msgconvert], 320 [ --without-msgconvert use msgconvert.pl and extract MS-Outlook .msg files]) 321 if test xno != x$with_msgconvert; then 322 AC_PATH_PROGS([MSGCONVERT], [msgconvert msgconvert.pl]) 323 test -n "$MSGCONVERT" && ( 324 AC_DEFINE([HAVE_MSGCONVERT],1, [Use msgconvert.pl and extract MS-Outlook .msg files]) 325 dnl AC_DEFINE_UNQUOTED([MSGCONVERT],"$MSGCONVERT",[msgconvert.pl path]) 326 AC_SUBST([MSGCONVERT]) 327 ) 328 fi 329 330 XO_OMEGA_WITH([readpst],[extract MS-Outlook .pst files]) 331 XO_OMEGA_WITH([catdoc], [extract MS-Excel .xls and MS-Powerpoint .ppt files], 332 [ 333 AC_PATH_PROG([CATPPT], [catppt]) 334 AC_SUBST([CATPPT]) 335 AC_PATH_PROG([XLS2CSV], [xls2csv]) 336 AC_SUBST([XLS2CSV]) 337 ]) 338 339 XO_OMEGA_WITH([pdftotext],[extract Adobe .pdf files], 340 [ 341 AC_PATH_PROG([PDFINFO], [pdfinfo]) 342 AC_DEFINE_UNQUOTED([PDFINFO],"$PDFINFO",[pdfinfo path]) 343 ]) 344 XO_OMEGA_WITH([pstotext], [extract Postscript .ps files]) 345 XO_OMEGA_WITH([antiword], [extract MS-Word .doc files]) 346 XO_OMEGA_WITH([wpd2text], [extract wordperfect files]) 347 XO_OMEGA_WITH([wps2text], [extract MS-works files]) 348 XO_OMEGA_WITH([unrtf], [extract RTF files]) 349 XO_OMEGA_WITH([pod2text], [extract from perl scripts]) 350 XO_OMEGA_WITH([catdvi], [extract from TeX dvi]) 351 XO_OMEGA_WITH([djvutxt], [extract from DjVu]) 352 277 353 dnl Disabled XML stuff as we don't currently build omindex-config 278 354 dnl dnl Check for libxml or libxml2. We do this by looking for xml-config. 279 355 dnl AC_PATH_PROG(XML_CONFIG_PATH, xml2-config) -
xapian-omega-1.0.7a/excel2text.in
diff -u xapian-omega-1.0.7a/excel2text.in.orig
old new 1 #! /bin/sh 2 # strip numbers, to stdout 3 @XLS2CSV@ -q0 "$1" | @SED@ -re's/[0123456789.]+,//g' -
xapian-omega-1.0.7a/langclass.conf
diff -u xapian-omega-1.0.7a/langclass.conf.orig
old new 1 # A config file for the language models 2 # provided with Gertjan van Noords language guesser 3 # (http://odur.let.rug.nl/~vannoord/TextCat/) 4 # 5 # Notes: 6 # - Putting the most probable languages at the top of the list 7 # improves performance, because this will raise the threshold for 8 # likely candidates more quickly. 9 # 10 /var/lib/omega/langclass/english.lm english 11 /var/lib/omega/langclass/german.lm german 12 /var/lib/omega/langclass/french.lm french 13 /var/lib/omega/langclass/danish.lm danish 14 /var/lib/omega/langclass/dutch.lm dutch 15 /var/lib/omega/langclass/finnish.lm finnish 16 /var/lib/omega/langclass/italian.lm italian 17 /var/lib/omega/langclass/norwegian.lm norwegian 18 /var/lib/omega/langclass/portuguese.lm portuguese 19 /var/lib/omega/langclass/russian-iso8859_5.lm russian 20 /var/lib/omega/langclass/russian-koi8_r.lm russian 21 /var/lib/omega/langclass/russian-windows1251.lm russian 22 /var/lib/omega/langclass/spanish.lm spanish 23 /var/lib/omega/langclass/swedish.lm swedish -
xapian-omega-1.0.7a/langclass/danish.lm
diff -u xapian-omega-1.0.7a/langclass/danish.lm.orig
old new 1 _ 21274 2 e 9291 3 r 5307 4 n 4733 5 i 3976 6 t 3948 7 s 3751 8 a 3296 9 l 3063 10 d 3025 11 o 2868 12 g 2471 13 er 2164 14 k 2002 15 m 1680 16 e_ 1655 17 en 1613 18 f 1507 19 de 1484 20 r_ 1379 21 v 1245 22 u 1176 23 t_ 1081 24 n_ 1032 25 er_ 992 26 b 942 27 . 870 28 ge 868 29 ._ 831 30 re 816 31 h 816 32 et 813 33 te 813 34 p 806 35 in 788 36 or 775 37 _s 753 38 _a 749 39 en_ 712 40 _e 691 41 ti 689 42 an 687 43 , 681 44 ,_ 677 45 _f 655 46 _d 645 47 el 642 48 ng 635 49 nd 634 50 g_ 634 51 se 615 52 le 615 53 st 607 54 s_ 601 55 _o 572 56 ne 560 57 li 537 58 et_ 524 59 es 521 60 _i 512 61 ri 511 62 sk 510 63 _de 498 64 å 497 65 ar 475 66 ed 473 67 ig 463 68 at 452 69 _m 446 70 is 443 71 fo 441 72 æ 441 73 ve 438 74 _k 434 75 ø 432 76 der 429 77 ke 428 78 ing 427 79 og 426 80 _b 412 81 me 408 82 il 407 83 for 405 84 ns 394 85 y 389 86 _h 380 87 _t 374 88 on 371 89 d_ 370 90 al 362 91 be 359 92 _fo 351 93 af 336 94 de_ 335 95 _og 333 96 _p 332 97 og_ 325 98 om 325 99 _for 324 100 _og_ 313 101 l_ 308 102 nge 302 103 i_ 295 104 _v 294 105 c 289 106 ter 283 107 ll 280 108 ni 278 109 nde 278 110 rs 277 111 _af 277 112 un 275 113 ra 271 114 ko 271 115 den 270 116 _i_ 268 117 id 265 118 til 265 119 j 265 120 vi 264 121 D 260 122 ere 256 123 ma 255 124 si 253 125 f_ 252 126 af_ 238 127 _af_ 235 128 ik 235 129 m_ 234 130 å_ 232 131 _ti 227 132 _D 226 133 _u 226 134 _er 225 135 nt 224 136 _en 224 137 ls 221 138 es_ 216 139 lig 216 140 ger 216 141 re_ 210 142 ag 210 143 _me 207 144 at_ 204 145 lle 200 146 ge_ 200 147 _til 200 148 ige 199 149 _er_ 199 150 der_ 199 151 em 199 152 ds 197 153 r. 195 154 io 195 155 r._ 195 156 ud 193 157 _at 192 158 _at_ 191 159 ta 190 160 els 190 161 _l 190 162 ha 190 163 il_ 189 164 or_ 189 165 ke_ 186 166 rt 185 167 gen 184 168 ka 183 169 - 180 170 rk 180 171 ning 178 172 ol 178 173 nin 178 174 la 177 175 ld 175 176 De 175 177 it 173 178 ede 172 179 ed_ 171 180 _ko 171 181 lse 171 182 ek 168 183 else 167 184 inge 167 185 på 167 186 ng_ 167 187 _på 167 188 iv 166 189 ør 166 190 so 165 191 he 165 192 ens 165 193 ske 165 194 ind 164 195 til_ 163 196 rn 163 197 ide 162 198 ev 162 199 den_ 162 200 to 162 201 sen 160 202 _be 160 203 sa 160 204 bl 158 205 _g 158 206 an_ 157 207 det 156 208 om_ 156 209 ru 156 210 va 155 211 _til_ 155 212 ste 154 213 rd 153 214 _på_ 152 215 k_ 152 216 på_ 152 217 di 152 218 kr 152 219 K 151 220 _De 149 221 for_ 148 222 te_ 148 223 kon 148 224 ver 147 225 mm 146 226 am 146 227 _en_ 145 228 _r 145 229 ne_ 144 230 ing_ 144 231 tr 143 232 le_ 142 233 del 142 234 _in 142 235 gt 140 236 _st 138 237 S 138 238 eg 138 239 gs 138 240 tt 138 241 r, 137 242 ser 137 243 r,_ 137 244 er. 137 245 ro 137 246 er._ 137 247 _for_ 136 248 ent 136 249 kt 136 250 eri 135 251 ur 134 252 lin 134 253 B 133 254 A 133 255 sti 133 256 ner 133 257 da 133 258 ris 132 259 ion 132 260 _K 131 261 ern 131 262 ers 130 263 ist 130 264 ær 130 265 ige_ 130 266 _si 130 267 tte 129 268 E 128 269 _n 128 270 nn 127 271 _B 126 272 _ha 126 273 _. 126 274 rne 125 275 H 125 276 _ud 125 277 rin 124 278 na 124 279 und 124 280 ft 124 281 _der 124 282 ku 123 283 _A 122 284 ler 120 285 and 120 286 end 120 287 ns_ 120 288 rg 119 289 op 119 290 er,_ 119 291 er, 119 292 ar_ 118 293 P 118 294 _S 117 295 _H 117 296 _._ 116 297 ov 116 298 erne 115 299 tio 115 300 med 115 301 tion 115 302 _E 115 303 _P 115 304 det_ 114 305 pr 114 306 e. 113 307 ter_ 113 308 : 113 309 kk 113 310 e._ 113 311 e,_ 113 312 e, 113 313 od 113 314 kke 113 315 ten 113 316 ling 113 317 :_ 112 318 mi 112 319 eli 112 320 lo 111 321 som 111 322 _den 111 323 rb 110 324 se_ 110 325 ell 110 326 sid 110 327 nne 109 328 fi 108 329 lt 107 330 v_ 107 331 _de_ 107 332 ark 106 333 lige 106 334 ngen 106 335 ie 105 336 _med 105 337 _der_ 105 338 ring 105 339 a_ 105 340 _vi 104 341 -_ 104 342 ys 103 343 gel 103 344 _so 103 345 ia 103 346 ive 102 347 ej 101 348 ati 101 349 ren 101 350 _det 101 351 side 101 352 ske_ 101 353 br 100 354 gi 100 355 F 100 356 M 100 357 ul 99 358 isk 99 359 men 99 360 n,_ 99 361 age 99 362 fr 99 363 n, 99 364 tu 98 365 ts 98 366 _ma 98 367 nder 98 368 ot 97 369 dt 97 370 R 97 371 med_ 96 372 ho 96 373 ans 95 374 _kon 95 375 pe 95 376 ce 94 377 gr 93 378 mme 92 379 ret 92 380 lige_ 92 381 mu 91 382 _med_ 91 383 hv 91 384 væ 91 385 Det 91 386 ens_ 91 387 kl 91 388 _M 90 389 T 90 390 ingen 90 391 rm 90 392 ill 89 393 elle 89 394 ef 89 395 ene 89 396 nds 89 397 ove 89 398 som_ 89 399 C 88 400 _den_ 88 -
xapian-omega-1.0.7a/langclass/dutch.lm
diff -u xapian-omega-1.0.7a/langclass/dutch.lm.orig
old new 1 _ 20104 2 e 9848 3 n 5323 4 a 3733 5 t 3683 6 i 3490 7 r 3195 8 d 2876 9 o 2845 10 n_ 2443 11 en 2439 12 s 2195 13 e_ 1842 14 l 1837 15 g 1522 16 en_ 1500 17 de 1489 18 er 1388 19 t_ 1377 20 v 1253 21 u 1217 22 k 1204 23 _d 1136 24 h 1102 25 m 1084 26 an 939 27 te 875 28 j 857 29 in 810 30 _v 793 31 r_ 751 32 de_ 742 33 ee 737 34 p 732 35 et 718 36 ge 716 37 aa 708 38 b 703 39 _e 686 40 st 669 41 z 668 42 ie 662 43 _de 655 44 w 631 45 c 611 46 . 604 47 s_ 582 48 _de_ 576 49 _h 572 50 el 570 51 ij 564 52 ._ 554 53 et_ 531 54 an_ 522 55 he 505 56 _o 497 57 nd 478 58 _i 475 59 ar 459 60 _m 451 61 re 442 62 ve 441 63 ' 428 64 or 424 65 ng 421 66 at 418 67 _s 415 68 oo 403 69 _z 401 70 le 395 71 _b 394 72 _a 391 73 _he 386 74 va 385 75 er_ 381 76 me 372 77 _w 368 78 f 361 79 on 351 80 _t 351 81 _va 345 82 _g 342 83 di 342 84 nt 340 85 , 335 86 g_ 335 87 ,_ 334 88 van 327 89 ch 326 90 is 326 91 ing 325 92 be 325 93 ni 320 94 it 317 95 een 316 96 _van 315 97 al 310 98 den 309 99 ti 309 100 van_ 307 101 oe 302 102 ke 302 103 _van_ 299 104 aar 299 105 d_ 295 106 we 293 107 da 292 108 tu 290 109 _ee 290 110 ud 287 111 een_ 286 112 li 284 113 es 282 114 _st 281 115 ver 281 116 ten 281 117 ri 275 118 nde 275 119 der 274 120 _in 270 121 k_ 268 122 vo 267 123 het 266 124 oor 264 125 _het 262 126 het_ 262 127 _het_ 259 128 _een 258 129 l_ 258 130 ze 257 131 _n 254 132 ro 248 133 gen 243 134 _een_ 241 135 at_ 240 136 op 238 137 n. 238 138 _en 237 139 rs 237 140 _da 235 141 stu 232 142 in_ 230 143 _be 229 144 _ge 228 145 _k 226 146 rd 226 147 tud 220 148 _en_ 220 149 n._ 217 150 te_ 209 151 ei 208 152 ent 206 153 _me 203 154 la 202 155 ek 202 156 ed 201 157 ra 200 158 stud 200 159 en. 200 160 ie_ 197 161 ste 196 162 _vo 195 163 _in_ 193 164 _stu 191 165 zi 191 166 om 189 167 ui 189 168 en._ 186 169 ten_ 185 170 _stud 185 171 ude 184 172 die 183 173 ns 183 174 _j 181 175 D 179 176 aan 179 177 se 179 178 ma 178 179 _ve 176 180 ne 174 181 _p 174 182 eg 173 183 p_ 172 184 ar_ 172 185 aar_ 171 186 _te 170 187 ng_ 169 188 _we 169 189 '' 167 190 _D 165 191 ers 164 192 _op 163 193 dat 161 194 dat_ 160 195 ig 160 196 ere 159 197 eer 158 198 _zi 158 199 voor 156 200 voo 156 201 nge 155 202 nder 151 203 nte 151 204 or_ 150 205 ta 150 206 je 149 207 ing_ 148 208 ll 148 209 _ver 147 210 jk 146 211 oor_ 146 212 _dat 145 213 ijk 145 214 ren 145 215 is_ 145 216 _dat_ 144 217 _l 144 218 and 144 219 lij 143 220 ter 143 221 na 142 222 uden 139 223 tude 138 224 _voor 136 225 _voo 136 226 ond 136 227 ken 135 228 cht 135 229 _al 135 230 ht 135 231 wa 134 232 ho 133 233 em 133 234 den_ 133 235 pe 132 236 sc 132 237 un 131 238 ur 131 239 _di 130 240 gen_ 130 241 zo 129 242 rt 129 243 ev 128 244 mo 128 245 lijk 127 246 _is 126 247 stude 124 248 ha 123 249 to 122 250 el_ 121 251 og 121 252 op_ 121 253 sch 120 254 ol 120 255 ente 119 256 _u 118 257 pr 118 258 end 118 259 mi 117 260 iet 116 261 _aa 116 262 eli 115 263 dent 115 264 ijn 115 265 jn 115 266 ou 115 267 men 114 268 _' 114 269 tie 113 270 _is_ 113 271 nie 113 272 tr 112 273 ak 112 274 id 112 275 udent 111 276 tuden 111 277 uit 110 278 _te_ 109 279 aan_ 109 280 ld 109 281 S 108 282 _aan 108 283 ede 108 284 ja 107 285 nten 107 286 it_ 107 287 je_ 107 288 ts 107 289 erd 106 290 est 106 291 E 105 292 _op_ 105 293 ad 104 294 al_ 104 295 _ze 104 296 _on 104 297 rk 104 298 lle 103 299 ens 103 300 gel 103 301 m_ 103 302 len 103 303 _r 102 304 ec 102 305 inge 102 306 met 102 307 _met 101 308 si 100 309 die_ 100 310 us 100 311 onde 99 312 _ni 99 313 De 99 314 eu 99 315 dente 99 316 enten 99 317 ic 99 318 _met_ 98 319 f_ 98 320 met_ 98 321 no 97 322 ko 96 323 voor_ 96 324 rde 96 325 H 96 326 ngen 95 327 lo 95 328 ot 95 329 as 94 330 zij 93 331 _nie 92 332 vi 92 333 eb 92 334 _De 92 335 _zij 91 336 ep 91 337 wi 91 338 _zo 91 339 kt 91 340 ege 91 341 G 91 342 bi 90 343 j_ 90 344 ij_ 90 345 ze_ 90 346 do 90 347 lan 89 348 ov 89 349 udi 89 350 ord 89 351 onder 89 352 V 88 353 elij 88 354 _wa 88 355 elijk 88 356 ef 88 357 _die 87 358 ag 86 359 erk 86 360 eren 86 361 R 85 362 ik 85 363 _ma 85 364 gr 85 365 am 85 366 _mo 84 367 ul 84 368 nn 83 369 eve 83 370 De_ 83 371 maa 83 372 ingen 83 373 wo 83 374 _'' 83 375 O 83 376 tudi 82 377 I 82 378 nt_ 82 379 tudie 81 380 ven 81 381 udie 81 382 nten_ 81 383 _die_ 81 384 jaa 80 385 ka 80 386 eke 80 387 ite 80 388 a_ 80 389 _je 80 390 ac 80 391 jaar 80 392 _je_ 79 393 _H 79 394 _zijn 79 395 zijn 79 396 n, 78 397 nen 78 398 N 78 399 n,_ 78 400 ijn_ 77 -
xapian-omega-1.0.7a/langclass/english.lm
diff -u xapian-omega-1.0.7a/langclass/english.lm.orig
old new 1 _ 20326 2 e 6617 3 t 4843 4 o 3834 5 n 3653 6 i 3602 7 a 3433 8 s 2945 9 r 2921 10 h 2507 11 e_ 2000 12 d 1816 13 _t 1785 14 c 1639 15 l 1635 16 th 1535 17 he 1351 18 _th 1333 19 u 1309 20 f 1253 21 m 1175 22 p 1151 23 _a 1145 24 the 1142 25 _the 1060 26 s_ 978 27 er 968 28 _o 967 29 he_ 928 30 d_ 888 31 t_ 885 32 the_ 844 33 _the_ 843 34 on 842 35 in 817 36 y 783 37 n_ 773 38 b 761 39 re 754 40 , 734 41 ,_ 732 42 an 732 43 g 728 44 w 718 45 _i 707 46 en 676 47 f_ 599 48 y_ 595 49 of 594 50 _of 592 51 es 589 52 ti 587 53 v 580 54 _of_ 575 55 of_ 575 56 nd 568 57 at 549 58 r_ 540 59 _w 534 60 it 522 61 ed 496 62 _p 494 63 nt 485 64 _c 462 65 o_ 457 66 io 450 67 _an 439 68 te 432 69 or 425 70 _b 418 71 nd_ 407 72 to 406 73 st 402 74 is 401 75 _s 396 76 _in 389 77 ion 385 78 and 385 79 de 384 80 ve 382 81 ha 375 82 ar 366 83 _m 361 84 and_ 360 85 _and 360 86 _and_ 358 87 se 353 88 _to 347 89 me 346 90 to_ 344 91 ed_ 339 92 . 330 93 be 329 94 _f 329 95 ._ 329 96 _to_ 320 97 co 317 98 ic 316 99 ns 308 100 al 307 101 le 304 102 ou 304 103 ce 293 104 ent 279 105 l_ 278 106 _co 277 107 tio 275 108 on_ 274 109 _d 274 110 tion 268 111 ri 266 112 _e 264 113 ng 253 114 hi 251 115 er_ 249 116 ea 246 117 as 245 118 _be 242 119 pe 242 120 h_ 234 121 _r 232 122 ec 227 123 ch 223 124 ro 222 125 ct 220 126 _h 219 127 pr 217 128 in_ 217 129 ne 214 130 ll 214 131 rt 213 132 s,_ 210 133 s, 210 134 li 209 135 ra 208 136 T 207 137 wh 204 138 a_ 203 139 ac 201 140 _wh 199 141 _n 196 142 ts 196 143 di 196 144 es_ 195 145 si 194 146 re_ 193 147 at_ 192 148 nc 192 149 ie 190 150 _a_ 188 151 _in_ 185 152 ing 184 153 us 182 154 _re 182 155 g_ 179 156 ng_ 178 157 op 178 158 con 177 159 tha 175 160 _l 174 161 _tha 174 162 ver 173 163 ma 173 164 ion_ 171 165 _con 171 166 ci 170 167 ons 170 168 _it 170 169 po 169 170 ere 168 171 is_ 167 172 ta 167 173 la 166 174 _pr 165 175 fo 164 176 ho 164 177 ir 162 178 ss 161 179 men 160 180 be_ 160 181 un 159 182 ty 159 183 _be_ 158 184 ing_ 157 185 om 156 186 ot 156 187 hat 155 188 ly 155 189 _g 155 190 em 153 191 _T 151 192 rs 150 193 mo 148 194 ch_ 148 195 wi 147 196 we 147 197 ad 147 198 ts_ 145 199 res 143 200 _wi 143 201 I 143 202 hat_ 142 203 ei 141 204 ly_ 141 205 ni 140 206 os 140 207 ca 139 208 ur 139 209 A 138 210 ut 138 211 that 138 212 _that 137 213 ati 137 214 _fo 137 215 st_ 137 216 il 136 217 or_ 136 218 for 136 219 pa 136 220 ul 135 221 ate 135 222 ter 134 223 it_ 134 224 nt_ 133 225 that_ 132 226 _ha 129 227 al_ 128 228 el 128 229 as_ 127 230 ll_ 127 231 _ma 125 232 no 124 233 ment 124 234 an_ 124 235 tion_ 122 236 su 122 237 bl 122 238 _de 122 239 nce 120 240 pl 120 241 fe 119 242 tr 118 243 so 118 244 int 115 245 ov 114 246 e, 114 247 e,_ 114 248 _u 113 249 ent_ 113 250 Th 113 251 her 113 252 j 112 253 atio 112 254 ation 112 255 _Th 111 256 le_ 110 257 ai 110 258 _it_ 110 259 _on 110 260 _for 109 261 ect 109 262 k 109 263 hic 108 264 est 108 265 der 107 266 tu 107 267 na 106 268 _by_ 106 269 by_ 106 270 E 106 271 by 106 272 _by 106 273 ve_ 106 274 _di 106 275 en_ 104 276 vi 104 277 m_ 103 278 _whi 102 279 iv 102 280 whi 102 281 ns_ 102 282 _A 101 283 ich 100 284 ge 100 285 pro 99 286 ess 99 287 _whic 99 288 ers 99 289 hich 99 290 ce_ 99 291 which 99 292 whic 99 293 all 98 294 ove 98 295 _is 98 296 ich_ 97 297 ee 97 298 hich_ 97 299 n,_ 96 300 n, 96 301 im 95 302 ir_ 94 303 hei 94 304 ions 94 305 sti 94 306 se_ 94 307 per 93 308 The 93 309 _pa 93 310 heir 93 311 id 93 312 eir 93 313 eir_ 93 314 ig 93 315 heir_ 93 316 _no 93 317 ev 93 318 era 92 319 _int 92 320 ted 91 321 _The 91 322 ies 91 323 art 91 324 thei 90 325 _ar 90 326 _thei 90 327 their 90 328 _pro 90 329 et 89 330 _pe 88 331 _mo 88 332 ther 88 333 x 87 334 gh 87 335 S 87 336 _is_ 87 337 ol 87 338 ty_ 87 339 _I 86 340 nde 86 341 am 86 342 rn 86 343 nte 86 344 mp 85 345 _su 84 346 _we 84 347 par 84 348 _v 84 349 pu 82 350 his 82 351 ow 82 352 mi 82 353 go 81 354 N 81 355 ue 81 356 ple 81 357 ep 80 358 ab 80 359 ;_ 80 360 ; 80 361 ex 80 362 ain 80 363 over 80 364 _un 79 365 q 79 366 qu 79 367 pp 79 368 ith 79 369 ry 79 370 _as 79 371 ber 79 372 ub 78 373 av 78 374 uc 78 375 s._ 77 376 s. 77 377 enc 77 378 are 77 379 iti 77 380 gr 76 381 his_ 76 382 ua 76 383 part 76 384 ff 75 385 eve 75 386 O 75 387 rea 74 388 ous 74 389 ia 74 390 The_ 73 391 ag 73 392 mb 73 393 _go 73 394 fa 72 395 on,_ 72 396 ern 72 397 t,_ 72 398 on, 72 399 t, 72 400 _me 71 -
xapian-omega-1.0.7a/langclass/finnish.lm
diff -u xapian-omega-1.0.7a/langclass/finnish.lm.orig
old new 1 _ 19984 2 a 9133 3 i 8384 4 t 7797 5 e 6481 6 n 6431 7 s 5897 8 l 4504 9 o 4163 10 u 4106 11 k 4013 12 ä 3354 13 n_ 2868 14 m 2569 15 a_ 1987 16 v 1905 17 r 1827 18 ta 1580 19 en 1553 20 is 1515 21 h 1508 22 y 1462 23 st 1390 24 in 1375 25 p 1342 26 j 1333 27 an 1139 28 si 1073 29 tt 1030 30 te 1008 31 en_ 982 32 _k 980 33 it 974 34 ll 947 35 aa 942 36 ä_ 902 37 va 878 38 el 855 39 _t 851 40 ka 846 41 i_ 835 42 . 832 43 se 818 44 li 806 45 tä 804 46 oi 767 47 ai 744 48 ._ 739 49 tu 734 50 _o 719 51 mi 715 52 al 703 53 on 684 54 d 681 55 _v 662 56 et 654 57 _j 641 58 t_ 635 59 ti 632 60 _m 628 61 _s 620 62 ja 616 63 ma 596 64 sa 595 65 la 582 66 ist 575 67 _e 565 68 to 565 69 ks 557 70 in_ 554 71 es 551 72 il 538 73 an_ 536 74 ki 527 75 , 525 76 ku 525 77 ,_ 524 78 us 520 79 as 514 80 nt 512 81 ri 495 82 ke 494 83 at 491 84 _p 485 85 le 484 86 ik 483 87 ss 477 88 ut 469 89 ö 469 90 sta 460 91 ee 459 92 uu 458 93 ol 457 94 ta_ 451 95 ne 445 96 ää 445 97 ei 443 98 uo 436 99 ko 433 100 un 430 101 lu 421 102 ii 420 103 e_ 418 104 nn 413 105 _h 412 106 ar 408 107 er 402 108 än 396 109 ja_ 386 110 im 381 111 on_ 365 112 _va 363 113 aan 354 114 _a 352 115 me 350 116 ak 345 117 ssa 331 118 na 330 119 ie 329 120 pa 327 121 _ja 326 122 ia 325 123 tä_ 322 124 _l 319 125 vi 317 126 ise 316 127 tta 315 128 de 314 129 os 312 130 lli 309 131 _ja_ 304 132 jo 295 133 vä 290 134 su 289 135 au 287 136 lis 286 137 _on 285 138 sä 284 139 uk 280 140 am 280 141 ot 280 142 ty 275 143 ett 271 144 ttä 270 145 ni 269 146 lä 267 147 ksi 264 148 nk 264 149 ht 263 150 ul 261 151 ell 261 152 sa_ 259 153 ha 257 154 sen 257 155 a. 254 156 isi 253 157 ste 253 158 aan_ 252 159 _on_ 252 160 _ka 252 161 sk 251 162 kk 246 163 itt 245 164 ok 242 165 a._ 239 166 all 239 167 yt 239 168 mä 237 169 mu 237 170 av 237 171 _y 236 172 lla 233 173 taa 231 174 ais 231 175 een 230 176 K 230 177 lt 228 178 s_ 227 179 ast 227 180 iv 226 181 ssa_ 225 182 ra 225 183 - 223 184 kse 223 185 oit 220 186 om 220 187 T 219 188 _ku 218 189 än_ 216 190 aa_ 214 191 at_ 214 192 tel 211 193 ui 210 194 si_ 208 195 rk 207 196 sta_ 207 197 _jo 203 198 kä 202 199 _K 201 200 est 200 201 em 200 202 he 199 203 _n 199 204 vo 198 205 _ta 196 206 eh 196 207 _ol 196 208 S 196 209 nta 196 210 _ko 194 211 je 194 212 stä 194 213 är 193 214 ust 191 215 mis 191 216 ns 190 217 pu 189 218 nen 188 219 ät 188 220 toi 188 221 iin 187 222 ten 187 223 min 186 224 ista 185 225 hd 184 226 a, 184 227 a,_ 184 228 sen_ 183 229 E 182 230 lle 181 231 vat 179 232 ill 177 233 no 176 234 pä 176 235 lm 176 236 llis 175 237 n. 175 238 io 172 239 ine 171 240 n._ 170 241 pi 169 242 uks 168 243 ava 168 244 ään 166 245 nen_ 165 246 ah 165 247 _mu 164 248 tus 163 249 mm 162 250 _to 162 251 ek 160 252 int 159 253 _r 159 254 lin 158 255 oim 158 256 _T 158 257 A 158 258 imi 157 259 tö 157 260 la_ 157 261 jä 157 262 aj 156 263 yh 155 264 o_ 154 265 lo 154 266 oli 153 267 een_ 153 268 le_ 153 269 _si 153 270 g 152 271 aik 151 272 vat_ 150 273 L 149 274 ur 149 275 ti_ 149 276 sia 148 277 ite 147 278 inen 147 279 ain 146 280 sti 146 281 lla_ 146 282 ys 145 283 _mi 145 284 val 144 285 stu 144 286 äm 144 287 alli 143 288 pe 143 289 utt 142 290 et_ 141 291 _tu 141 292 eri 140 293 _E 140 294 : 140 295 nki 139 296 ir 139 297 llä 138 298 up 138 299 äi 137 300 ama 137 301 _ha 135 302 id 135 303 _se 135 304 po 134 305 inen_ 134 306 tte 133 307 nna 133 308 ten_ 132 309 or 132 310 ts 131 311 nä 131 312 yk 131 313 äs 131 314 _S 130 315 ses 130 316 ve 130 317 ess 129 318 äl 129 319 ita 129 320 lai 129 321 H 129 322 van 127 323 äk 127 324 kin 127 325 N 127 326 _te 126 327 den 126 328 tee 126 329 P 126 330 kaa 126 331 iin_ 125 332 kun 125 333 ois 125 334 sit 125 335 oh 124 336 V 124 337 yö 124 338 äv 124 339 tav 124 340 voi 124 341 ia_ 123 342 I 123 343 oll 123 344 maa 122 345 ih 122 346 oj 122 347 rj 121 348 ro 121 349 ikk 120 350 so 120 351 oo 120 352 oimi 120 353 do 120 354 pp 119 355 M 119 356 _ei 118 357 toim 118 358 op 118 359 uut 118 360 tet 118 361 _i 118 362 _ma 117 363 vai 117 364 lä_ 116 365 u_ 116 366 sy 116 367 kau 116 368 utta 116 369 un_ 115 370 eu 115 371 ssä 115 372 tti 115 373 _sa 115 374 mp 114 375 eis 114 376 ka_ 112 377 että 112 378 taa_ 111 379 _et 111 380 hu 111 381 itu 111 382 suu 111 383 den_ 111 384 ksen 110 385 ap 110 386 _ke 110 387 uv 110 388 tam 110 389 yv 109 390 aup 109 391 stä_ 109 392 asta 109 393 äy 109 394 kan 108 395 nu 108 396 ukse 108 397 _toi 107 398 ien 107 399 hi 107 400 iss 107 -
xapian-omega-1.0.7a/langclass/french.lm
diff -u xapian-omega-1.0.7a/langclass/french.lm.orig
old new 1 _ 20800 2 e 7258 3 i 4051 4 s 4003 5 a 3972 6 n 3903 7 r 3650 8 t 3590 9 u 2968 10 o 2823 11 l 2723 12 e_ 2632 13 d 2241 14 s_ 1721 15 _d 1693 16 c 1663 17 p 1528 18 é 1320 19 m 1297 20 es 1164 21 t_ 1106 22 _l 1079 23 de 1048 24 on 959 25 _de 940 26 en 939 27 _p 852 28 nt 825 29 le 808 30 es_ 791 31 re 777 32 , 721 33 ,_ 720 34 n_ 703 35 de_ 685 36 ' 670 37 an 667 38 _de_ 645 39 v 641 40 _s 610 41 r_ 596 42 _c 594 43 er 585 44 ai 575 45 _a 558 46 _e 554 47 ou 554 48 q 549 49 qu 538 50 is 530 51 te 528 52 ti 525 53 ur 519 54 it 514 55 g 498 56 a_ 490 57 f 480 58 la 476 59 in 475 60 _le 441 61 me 436 62 nt_ 432 63 . 427 64 b 427 65 ra 423 66 io 416 67 ent 415 68 ._ 404 69 ne 395 70 ns 392 71 ion 383 72 h 381 73 ue 376 74 se 371 75 le_ 370 76 ar 370 77 ie 362 78 co 361 79 at 359 80 tr 359 81 et 349 82 pr 342 83 ce 336 84 au 328 85 u_ 321 86 il 314 87 _r 313 88 _la 304 89 un 303 90 eu 303 91 st 300 92 re_ 296 93 ro 290 94 la_ 288 95 on_ 287 96 _m 286 97 _la_ 283 98 que 281 99 _qu 280 100 _q 280 101 po 275 102 tio 273 103 tion 273 104 pa 273 105 li 271 106 _t 269 107 nc 268 108 si 266 109 _pr 265 110 ri 264 111 al 263 112 ui 262 113 _co 259 114 i_ 255 115 ta 255 116 é_ 251 117 x 247 118 em 244 119 l_ 243 120 et_ 238 121 _l' 236 122 l' 236 123 les 233 124 ns_ 233 125 ir 232 126 _le_ 228 127 ent_ 227 128 or 226 129 ré 224 130 _f 224 131 ne_ 222 132 à 221 133 ve 220 134 ch 220 135 it_ 219 136 di 219 137 oi 217 138 - 216 139 ni 215 140 à_ 215 141 les_ 215 142 d' 214 143 el 212 144 ss 212 145 _n 212 146 ut 211 147 our 210 148 des 210 149 " 208 150 ur_ 207 151 nd 207 152 er_ 206 153 ait 206 154 ion_ 204 155 rs 202 156 _en 201 157 _et 200 158 j 200 159 _d' 200 160 ll 199 161 _des 198 162 des_ 197 163 _pa 197 164 té 196 165 _et_ 195 166 _à 195 167 _à_ 195 168 om 193 169 ma 192 170 ati 190 171 _des_ 189 172 L 188 173 so 187 174 _u 185 175 è 184 176 _" 183 177 sa 182 178 _po 181 179 tre 181 180 dé 181 181 ue_ 180 182 pe 179 183 en_ 179 184 ont 178 185 _un 178 186 _L 178 187 us 176 188 _les 176 189 _les_ 176 190 rt 176 191 is_ 173 192 _i 173 193 du 172 194 e,_ 171 195 e, 171 196 na 171 197 s, 170 198 s,_ 170 199 as 169 200 men 169 201 M 167 202 ait_ 167 203 'a 166 204 vi 162 205 ci 159 206 ant 158 207 _au 158 208 da 157 209 _M 157 210 ation 155 211 atio 155 212 con 154 213 que_ 153 214 ons 153 215 eur 151 216 est 149 217 me_ 149 218 mi 149 219 par 148 220 tion_ 148 221 _so 147 222 te_ 147 223 res 144 224 lo 144 225 ment 144 226 és 144 227 ans 143 228 _du 142 229 du_ 141 230 ux 141 231 un_ 140 232 y 138 233 pro 138 234 _du_ 136 235 _dé 136 236 ce_ 135 237 _se 134 238 _re 134 239 pl 133 240 A 132 241 ge 131 242 ic 131 243 su 130 244 x_ 129 245 ien 129 246 nce 129 247 "_ 129 248 ac 128 249 il_ 128 250 qui 128 251 _pro 127 252 no 127 253 av 126 254 _v 125 255 _o 125 256 rs_ 125 257 ans_ 124 258 eme 124 259 bl 123 260 emen 122 261 _en_ 122 262 iqu 122 263 ct 122 264 iq 122 265 lle 122 266 nn 121 267 ts 121 268 ement 121 269 ét 120 270 _"_ 120 271 ér 119 272 té_ 119 273 _ce 119 274 mp 119 275 ire 119 276 ui_ 119 277 to 118 278 he 117 279 _é 117 280 ca 117 281 _j 116 282 ec 116 283 va 116 284 _par 116 285 ée 115 286 _con 115 287 se_ 114 288 tre_ 113 289 ique 112 290 dan 111 291 éc 111 292 ha 110 293 une 110 294 P 110 295 lu 110 296 ux_ 109 297 _b 108 298 s. 108 299 pou 108 300 _pou 108 301 ier 107 302 C 107 303 ais 106 304 s._ 105 305 ain 104 306 _un_ 104 307 nte 103 308 'e 103 309 mo 103 310 mm 103 311 ment_ 102 312 une_ 102 313 com 101 314 _P 101 315 'i 101 316 _ma 100 317 do 99 318 ant_ 98 319 anc 98 320 che 97 321 ap 97 322 ont_ 97 323 _que 97 324 os 97 325 urs 96 326 _di 96 327 fi 96 328 im 96 329 pour 96 330 _pour 96 331 ê 95 332 ts_ 95 333 _g 95 334 our_ 94 335 _sa 94 336 ntr 94 337 _da 94 338 _ré 93 339 rai 93 340 rm 93 341 _qui 93 342 e. 92 343 am 92 344 _com 91 345 uv 91 346 _C 91 347 D 91 348 qui_ 90 349 e._ 90 350 pu 89 351 _qui_ 88 352 ia 87 353 _dan 87 354 _dans 87 355 dans 87 356 ter 87 357 fo 87 358 son 87 359 dans_ 87 360 id 86 361 ag 86 362 ine 86 363 tu 85 364 ran 85 365 au_ 85 366 ol 85 367 oc 84 368 est_ 84 369 st_ 84 370 enc 84 371 F 82 372 _tr 81 373 'u 81 374 tai 81 375 ell 80 376 R 79 377 _su 79 378 S 79 379 ions 79 380 pré 79 381 sé 78 382 ab 78 383 né 77 384 _que_ 77 385 _in 77 386 _av 76 387 pour_ 76 388 fa 76 389 rr 76 390 air 75 391 _ch 75 392 _a_ 75 393 ba 74 394 _pl 74 395 gr 74 396 tt 74 397 ssi 74 398 rd 73 399 pas 73 400 bi 73 -
xapian-omega-1.0.7a/langclass/german.lm
diff -u xapian-omega-1.0.7a/langclass/german.lm.orig
old new 1 _ 31586 2 e 15008 3 n 9058 4 i 7299 5 r 6830 6 t 5662 7 s 5348 8 a 4618 9 h 4176 10 d 4011 11 er 3415 12 en 3412 13 u 3341 14 l 3266 15 n_ 2848 16 c 2636 17 ch 2460 18 g 2407 19 o 2376 20 e_ 2208 21 r_ 2128 22 m 2077 23 _d 1948 24 de 1831 25 en_ 1786 26 ei 1718 27 er_ 1570 28 in 1568 29 te 1505 30 ie 1505 31 b 1458 32 t_ 1425 33 f 1306 34 k 1176 35 ge 1144 36 s_ 1137 37 un 1113 38 , 1104 39 ,_ 1099 40 w 1099 41 z 1060 42 nd 1039 43 he 1004 44 st 989 45 _s 952 46 _de 949 47 . 909 48 _e 906 49 ne 906 50 der 880 51 ._ 847 52 be 841 53 es 829 54 ic 796 55 _a 791 56 ie_ 779 57 is 769 58 ich 763 59 an 755 60 re 749 61 di 732 62 ein 730 63 se 730 64 " 720 65 ng 709 66 _i 706 67 sc 683 68 sch 681 69 it 673 70 der_ 652 71 h_ 651 72 ch_ 642 73 S 630 74 le 609 75 p 609 76 ä 607 77 ü 603 78 au 603 79 v 602 80 che 599 81 _w 596 82 d_ 585 83 die 576 84 _di 572 85 m_ 562 86 _die 559 87 el 548 88 _S 540 89 _der 529 90 li 527 91 _der_ 523 92 si 515 93 al 514 94 ns 507 95 on 501 96 or 495 97 ti 490 98 ten 487 99 ht 486 100 die_ 485 101 _die_ 483 102 D 479 103 rt 478 104 nd_ 476 105 _u 470 106 nt 468 107 A 466 108 in_ 464 109 den 461 110 cht 447 111 und 443 112 me 440 113 _z 429 114 ung 426 115 ll 423 116 _un 421 117 _ei 419 118 _n 415 119 hr 412 120 ine 412 121 _A 408 122 _ein 405 123 ar 404 124 ra 403 125 _v 400 126 _g 400 127 as 395 128 zu 392 129 et 389 130 em 385 131 _D 380 132 eine 376 133 gen 376 134 g_ 376 135 da 368 136 we 366 137 K 365 138 lt 360 139 B 354 140 _" 353 141 nde 349 142 ni 347 143 und_ 345 144 E 345 145 ur 345 146 _m 342 147 ri 341 148 ha 340 149 eh 339 150 ten_ 338 151 es_ 336 152 _K 336 153 _und 335 154 ig 335 155 _b 335 156 hen 334 157 _und_ 332 158 _au 329 159 _B 327 160 _da 325 161 _zu 324 162 _in 322 163 at 321 164 us 318 165 wi 307 166 n, 305 167 n,_ 304 168 nn 304 169 te_ 301 170 eit 301 171 _h 300 172 ter 299 173 M 298 174 n. 295 175 ß 294 176 ng_ 289 177 sche 289 178 - 283 179 rs 282 180 den_ 282 181 _si 280 182 G 280 183 im 278 184 _ge 277 185 chen 276 186 rd 273 187 _E 273 188 n._ 270 189 icht 270 190 rn 268 191 uf 267 192 isch 264 193 isc 264 194 nen 263 195 _in_ 262 196 _M 260 197 _er 257 198 ich_ 255 199 ac 253 200 lic 252 201 _G 252 202 ber 252 203 la 251 204 vo 251 205 eb 250 206 ke 249 207 F 248 208 as_ 248 209 hen_ 248 210 ach 245 211 en, 244 212 ung_ 243 213 lich 243 214 ste 243 215 en,_ 243 216 _k 241 217 ben 241 218 _f 241 219 en. 241 220 _be 239 221 it_ 239 222 L 238 223 _se 237 224 mi 236 225 ve 236 226 na 236 227 on_ 236 228 P 235 229 ss 234 230 ist 234 231 ö 234 232 ht_ 233 233 ru 233 234 st_ 229 235 _F 229 236 ts 227 237 ab 226 238 W 226 239 ol 225 240 _eine 225 241 hi 225 242 so 224 243 em_ 223 244 "_ 223 245 ren 222 246 en._ 221 247 chen_ 221 248 R 221 249 ta 221 250 ere 220 251 ische 219 252 ers 218 253 ert 217 254 _P 217 255 tr 217 256 ed 215 257 ze 215 258 eg 215 259 ens 215 260 ür 213 261 ah 212 262 _vo 212 263 ne_ 211 264 cht_ 210 265 uc 209 266 _wi 209 267 nge 208 268 lle 208 269 fe 207 270 _L 207 271 ver 206 272 hl 205 273 V 204 274 ma 203 275 wa 203 276 auf 201 277 H 198 278 _W 195 279 T 195 280 nte 193 281 uch 193 282 l_ 192 283 sei 192 284 nen_ 190 285 u_ 189 286 _den 189 287 _al 189 288 _V 188 289 t. 188 290 lte 187 291 ut 186 292 ent 184 293 sich 183 294 sic 183 295 il 183 296 ier 182 297 am 181 298 gen_ 180 299 sen 179 300 fü 178 301 um 178 302 t._ 177 303 f_ 174 304 he_ 174 305 ner 174 306 nst 174 307 ls 174 308 _sei 173 309 ro 173 310 ir 173 311 ebe 173 312 mm 173 313 ag 172 314 ern 169 315 t,_ 169 316 t, 169 317 eu 169 318 ft 168 319 icht_ 167 320 hre 167 321 Be 166 322 nz 165 323 nder 165 324 _T 164 325 _den_ 164 326 iche 163 327 tt 163 328 zu_ 162 329 and 162 330 J 161 331 rde 160 332 rei 160 333 _we 159 334 _H 159 335 ige 159 336 _Be 158 337 rte 157 338 hei 156 339 das 155 340 aus 155 341 che_ 154 342 _das 154 343 _zu_ 154 344 tz 154 345 _ni 153 346 das_ 153 347 _R 153 348 N 153 349 des 153 350 _ve 153 351 _J 152 352 I 152 353 _das_ 152 354 men 151 355 _so 151 356 _ver 151 357 _auf 150 358 ine_ 150 359 _ha 150 360 rg 149 361 ind 148 362 eben 148 363 kt 147 364 mit 147 365 _an 147 366 her 146 367 Ge 146 368 Sc 145 369 _sich 145 370 U 145 371 Sch 145 372 _sic 145 373 end 145 374 Di 144 375 abe 143 376 ck 143 377 sse 142 378 ür_ 142 379 ell 142 380 ik 141 381 o_ 141 382 nic 141 383 nich 141 384 sa 141 385 _fü 140 386 hn 140 387 zi 140 388 no 140 389 nicht 140 390 im_ 139 391 von_ 139 392 von 139 393 _nic 139 394 _nich 139 395 eine_ 139 396 oc 138 397 wei 138 398 io 138 399 schen 138 400 gt 138 -
xapian-omega-1.0.7a/langclass/italian.lm
diff -u xapian-omega-1.0.7a/langclass/italian.lm.orig
old new 1 _ 25028 2 a 7570 3 e 6477 4 i 5481 5 o 5104 6 l 3905 7 n 3866 8 r 3502 9 t 2934 10 c 2862 11 s 2862 12 a_ 2504 13 e_ 2404 14 d 2004 15 i_ 1749 16 o_ 1679 17 u 1650 18 v 1611 19 p 1561 20 m 1414 21 _c 1325 22 , 1192 23 ,_ 1192 24 _s 1190 25 _d 1094 26 g 1067 27 an 925 28 er 915 29 _a 914 30 _p 895 31 la 858 32 _l 830 33 re 799 34 ar 769 35 h 762 36 no 753 37 co 726 38 va 698 39 _e 657 40 n_ 656 41 on 656 42 ra 653 43 to 651 44 f 638 45 di 638 46 _i 634 47 ch 634 48 ll 633 49 l_ 624 50 la_ 598 51 ta 593 52 el 576 53 in 567 54 _m 558 55 en 529 56 b 528 57 ri 525 58 _co 523 59 _n 523 60 _di 522 61 li 513 62 av 507 63 al 501 64 le 494 65 ia 492 66 se 484 67 ol 479 68 _f 477 69 or 477 70 te 469 71 _e_ 467 72 ve 454 73 at 449 74 de 447 75 . 443 76 ne 429 77 va_ 428 78 ca 426 79 ._ 422 80 tt 422 81 re_ 415 82 nt 415 83 io 411 84 _v 407 85 pe 405 86 z 392 87 to_ 391 88 _ch 389 89 na 384 90 si 384 91 ' 383 92 he 382 93 no_ 379 94 ci 374 95 _la 373 96 ro 371 97 _g 370 98 st 368 99 cc 366 100 he_ 362 101 di_ 362 102 ma 358 103 ev 354 104 che 354 105 es 352 106 me 352 107 pa 351 108 _t 349 109 ti 348 110 _di_ 347 111 ss 345 112 che_ 344 113 a,_ 337 114 a, 337 115 nd 335 116 o, 333 117 o,_ 333 118 ell 330 119 gl 323 120 sa 322 121 il 322 122 gli 321 123 da 318 124 as 318 125 do 314 126 _che 308 127 _che_ 306 128 eva 306 129 _la_ 300 130 lla 298 131 le_ 293 132 un 291 133 _pe 290 134 _de 288 135 q 283 136 qu 283 137 ava 280 138 po 277 139 on_ 275 140 r_ 273 141 li_ 273 142 _b 269 143 _il 268 144 _il_ 268 145 il_ 268 146 lo 267 147 om 263 148 e, 263 149 e,_ 263 150 ni 258 151 tr 258 152 so 255 153 ra_ 253 154 os 251 155 _in 249 156 _u 248 157 per 244 158 are 243 159 et 243 160 _se 240 161 ano 239 162 si_ 238 163 _ca 238 164 _qu 238 165 lla_ 238 166 _q 238 167 _a_ 236 168 ac 236 169 _r 234 170 ic 233 171 _no 232 172 ie 227 173 fa 227 174 hi 226 175 del 225 176 ua 222 177 _per 218 178 ce 218 179 _ma 216 180 sc 216 181 _del 215 182 mi 212 183 _un 208 184 chi 206 185 era 205 186 i, 205 187 i,_ 205 188 su 203 189 and 202 190 vo 202 191 _fa 201 192 eva_ 200 193 ano_ 199 194 gli_ 197 195 non 196 196 pi 196 197 vi 195 198 er_ 195 199 _al 194 200 se_ 193 201 _ne 192 202 _non 191 203 am 190 204 is 187 205 ava_ 187 206 _non_ 186 207 non_ 186 208 in_ 185 209 ent 185 210 _si 184 211 _pa 184 212 com 183 213 ! 182 214 _le 182 215 _su 181 216 uo 181 217 el_ 180 218 !_ 180 219 l' 178 220 ue 177 221 te_ 177 222 _com 177 223 are_ 176 224 pr 176 225 _in_ 176 226 van 172 227 mo 172 228 ta_ 171 229 gn 167 230 ere 166 231 na_ 166 232 tto 163 233 it 161 234 _per_ 161 235 per_ 161 236 é 161 237 all 160 238 ess 159 239 ut 159 240 col 158 241 acc 157 242 gi 155 243 lo_ 154 244 oc 154 245 vano 153 246 io_ 153 247 _av 151 248 ndo 151 249 é_ 151 250 ato 149 251 ave 148 252 _st 147 253 me_ 147 254 'a 146 255 ia_ 144 256 con 143 257 mp 143 258 fi 142 259 ett 142 260 _si_ 141 261 _pi 140 262 era_ 140 263 ti_ 140 264 ó 140 265 vano_ 140 266 _gl 139 267 qua 139 268 ella 139 269 sta 138 270 ome 137 271 S 137 272 _gli 137 273 _S 137 274 ad 136 275 _ve 134 276 ant 134 277 ne_ 134 278 ó_ 133 279 sp 133 280 do_ 133 281 _po 132 282 ro_ 132 283 ov 132 284 _le_ 131 285 ella_ 130 286 sse 129 287 _con 128 288 ir 128 289 _vi 128 290 ig 127 291 _gli_ 127 292 _ave 127 293 vev 127 294 un_ 126 295 ot 126 296 veva 125 297 dell 125 298 que 125 299 a. 125 300 _o 125 301 a._ 124 302 tu 124 303 cia 123 304 za 123 305 _que 123 306 _da 121 307 par 121 308 _pr 120 309 cch 120 310 _dell 120 311 eg 119 312 _sa 119 313 o._ 119 314 o. 119 315 _col 118 316 lt 118 317 _un_ 118 318 rt 118 319 ur 117 320 _vo 117 321 _me 117 322 ome_ 117 323 L 116 324 ap 116 325 _L 116 326 zi 116 327 nto 116 328 og 115 329 _an 115 330 _so 115 331 em 114 332 ag 114 333 be 111 334 ni_ 111 335 im 110 336 cchi 110 337 ver 110 338 lle 109 339 nz 109 340 cci 109 341 _ri 109 342 nc 108 343 _er 108 344 come_ 107 345 come 107 346 aveva 107 347 ui 107 348 avev 107 349 tto_ 107 350 _come 106 351 ed 106 352 P 105 353 man 105 354 _P 105 355 rs 105 356 occ 104 357 ndo_ 103 358 ato_ 103 359 _qua 103 360 _era 103 361 ari 102 362 ba 100 363 _mo 100 364 nel 100 365 id 99 366 men 98 367 _fi 98 368 _all 98 369 rr 97 370 _do 97 371 _avev 97 372 att 97 373 l'a 96 374 ei 96 375 zz 96 376 ; 96 377 vol 95 378 pp 95 379 tra 95 380 ;_ 95 381 ere_ 94 382 lle_ 94 383 nda 94 384 utt 94 385 est 93 386 _nel 93 387 ul 92 388 ola 92 389 iv 92 390 ando 90 391 ale 90 392 lu 90 393 rn 90 394 e. 89 395 e._ 89 396 ll' 89 397 tta 88 398 nte 87 399 _l' 87 400 uel 87 -
xapian-omega-1.0.7a/langclass/norwegian.lm
diff -u xapian-omega-1.0.7a/langclass/norwegian.lm.orig
old new 1 _ 22970 2 e 6833 3 n 4206 4 r 3516 5 t 3112 6 a 2587 7 s 2440 8 i 2112 9 l 1901 10 o 1900 11 n_ 1875 12 r_ 1761 13 k 1713 14 g 1630 15 en 1615 16 m 1508 17 e_ 1450 18 d 1444 19 er 1436 20 h 1306 21 t_ 1300 22 _h 1180 23 _s 1148 24 er_ 1105 25 v 982 26 en_ 976 27 an 919 28 . 901 29 ._ 791 30 _. 781 31 et 770 32 g_ 762 33 _._ 726 34 å 725 35 u 719 36 f 709 37 p 702 38 ha 682 39 _ha 672 40 de 657 41 te 651 42 _e 621 43 et_ 614 44 re 581 45 ne 565 46 _o 554 47 an_ 544 48 ke 534 49 _, 522 50 ,_ 522 51 , 522 52 _,_ 522 53 _f 519 54 _m 515 55 or 503 56 _d 483 57 _i 480 58 å_ 479 59 se 476 60 m_ 469 61 nn 454 62 b 449 63 me 441 64 ø 434 65 _a 413 66 st 404 67 _t 398 68 og 380 69 _v 377 70 _og 366 71 ar 364 72 el 364 73 le 361 74 i_ 356 75 om 353 76 og_ 351 77 _og_ 351 78 li 350 79 _k 346 80 _de 339 81 ge 339 82 han 337 83 y 333 84 _han 332 85 ve 330 86 kk 323 87 in 311 88 _b 307 89 fo 301 90 j 301 91 il 298 92 _H 291 93 H 291 94 han_ 288 95 _han_ 288 96 for 287 97 ik 281 98 l_ 278 99 kke 277 100 tt 276 101 ti 270 102 ne_ 270 103 d_ 269 104 ed 269 105 om_ 268 106 nne 266 107 _me 264 108 ng 257 109 _er 257 110 _fo 256 111 eg 256 112 _se 256 113 _g 256 114 un 255 115 ig 255 116 sk 253 117 _er_ 252 118 _p 252 119 _for 250 120 ke_ 249 121 _n 238 122 _l 233 123 al 232 124 ør 222 125 s_ 221 126 ar_ 215 127 at 214 128 _en 211 129 he 211 130 pe 209 131 _i_ 208 132 am 200 133 es 200 134 si 200 135 enn 197 136 det 195 137 or_ 193 138 vi 190 139 ns 189 140 ikk 188 141 det_ 185 142 so 185 143 un_ 183 144 il_ 181 145 nd 181 146 te_ 181 147 "_ 180 148 " 180 149 _"_ 180 150 _" 180 151 em 179 152 _ti 176 153 kke_ 176 154 lig 174 155 ten 174 156 Ha 173 157 _Ha 173 158 re_ 172 159 ikke 168 160 je 165 161 Han 165 162 ter 165 163 _Han 165 164 eg_ 164 165 på 164 166 _på 163 167 _si 163 168 _å 163 169 _Han_ 162 170 Han_ 162 171 på_ 162 172 _på_ 161 173 til 160 174 som 160 175 _so 159 176 den 159 177 _det 157 178 ed_ 155 179 ll 155 180 _ik 155 181 rt 155 182 som_ 153 183 ra 152 184 a_ 152 185 har 152 186 nt 152 187 de_ 152 188 tr 151 189 v_ 151 190 _har 151 191 ka 151 192 ig_ 150 193 _som 150 194 for_ 150 195 _som_ 150 196 _en_ 149 197 hu 149 198 _ikk 148 199 _ham 148 200 ham 148 201 ste 148 202 _det_ 148 203 _ikke 148 204 enne 148 205 ikke_ 148 206 har_ 147 207 nge 147 208 D 147 209 _har_ 147 210 _D 147 211 am_ 147 212 ere 147 213 ham_ 146 214 _ham_ 146 215 it 145 216 _he 144 217 _til 144 218 av 143 219 va 140 220 men 140 221 år 140 222 _ve 140 223 _hu 139 224 ta 139 225 pen 137 226 sp 137 227 _st 135 228 tte 135 229 la 135 230 _E 133 231 E 133 232 den_ 130 233 is 130 234 til_ 128 235 _r 128 236 tt_ 128 237 år_ 127 238 k_ 124 239 _å_ 124 240 ri 124 241 _til_ 124 242 at_ 123 243 ene 123 244 seg 123 245 _av 123 246 med 122 247 _vi 122 248 _seg 122 249 seg_ 121 250 _seg_ 121 251 _for_ 120 252 nne_ 120 253 ut 120 254 _u 119 255 mm 119 256 mme 119 257 De 118 258 _De 118 259 _at 118 260 _hun 117 261 hun 117 262 ko 117 263 be 116 264 _at_ 115 265 ter_ 115 266 pen_ 114 267 ker 113 268 hun_ 113 269 _hun_ 113 270 on 111 271 lig_ 111 272 .. 110 273 hen 107 274 _med 107 275 rs 106 276 ser 106 277 med_ 105 278 _men 104 279 _hen 104 280 _sk 104 281 _med_ 104 282 ak 103 283 ans 103 284 ker_ 102 285 av_ 101 286 _ka 101 287 no 100 288 ver 100 289 ler 99 290 J 99 291 spe 99 292 ten_ 99 293 _J 99 294 ene_ 98 295 ld 98 296 hv 98 297 _av_ 98 298 ger 97 299 ni 96 300 gen 96 301 ie 95 302 ser_ 94 303 _et 94 304 spen 94 305 _hv 94 306 men_ 93 307 Espe 92 308 Es 92 309 _Esp 92 310 _Es 92 311 _Espe 92 312 Esp 92 313 _al 92 314 Espen 92 315 lle 89 316 rem 89 317 id 89 318 fø 89 319 ei 88 320 inn 88 321 rd 88 322 enne_ 88 323 _henn 87 324 henne 87 325 henn 87 326 kt 86 327 spen_ 86 328 _om 86 329 ler_ 86 330 da 86 331 ett 86 332 itt 86 333 bl 85 334 to 85 335 _Je 84 336 ger_ 84 337 Je 84 338 æ 84 339 ma 83 340 ing 83 341 ær 83 342 ns_ 83 343 eli 82 344 ang 82 345 _be 82 346 så 82 347 _den 82 348 pp 81 349 rk 81 350 dr 81 351 oe 81 352 ss 81 353 _fø 80 354 ek 80 355 le_ 79 356 _no 79 357 kj 78 358 elig 78 359 nes 78 360 nn_ 77 361 nk 77 362 fr 77 363 sl 77 364 my 77 365 kan 77 366 så_ 76 367 as 76 368 _om_ 76 369 _kan 75 370 _ko 75 371 _bl 73 372 Hu 73 373 nen 73 374 _Hu 73 375 eng 73 376 gj 73 377 rt_ 72 378 ge_ 72 379 ba 72 380 lv 71 381 rer 71 382 nde 71 383 ls 70 384 lo 70 385 ga 70 386 _noe 70 387 ro 70 388 _den_ 70 389 _ut 70 390 noe 70 391 Hun 69 392 Hun_ 69 393 _in 69 394 _Hun 69 395 _Hun_ 69 396 ren 68 397 øre 68 398 ør_ 68 399 sen 68 400 sa 67 -
xapian-omega-1.0.7a/langclass/portuguese.lm
diff -u xapian-omega-1.0.7a/langclass/portuguese.lm.orig
old new 1 _ 35328 2 a 10423 3 e 10132 4 o 8919 5 s 6795 6 r 6033 7 i 5443 8 n 4588 9 d 4531 10 t 4217 11 m 3476 12 u 3404 13 o_ 3240 14 a_ 3029 15 e_ 2879 16 c 2756 17 s_ 2461 18 _d 2379 19 l 2307 20 p 2242 21 _a 1753 22 de 1751 23 , 1660 24 ,_ 1658 25 _e 1454 26 es 1447 27 os 1412 28 ra 1343 29 _p 1328 30 nt 1302 31 _de 1248 32 do 1215 33 en 1176 34 re 1150 35 as 1123 36 v 1115 37 m_ 1113 38 de_ 1096 39 er 1082 40 g 1053 41 _c 1047 42 da 1008 43 co 986 44 os_ 975 45 te 974 46 ar 950 47 or 943 48 q 938 49 qu 938 50 _s 908 51 ta 902 52 _de_ 901 53 _o 858 54 se 841 55 ue 831 56 to 799 57 ad 777 58 . 761 59 que 752 60 em 751 61 an 748 62 f 746 63 r_ 745 64 b 732 65 st 718 66 is 716 67 al 712 68 _qu 706 69 _q 706 70 in 701 71 as_ 696 72 ã 695 73 do_ 685 74 ent 678 75 ão 677 76 _n 671 77 _co 660 78 _a_ 654 79 _m 646 80 on 645 81 ç 624 82 ri 623 83 _que 619 84 ma 602 85 po 581 86 ia 580 87 ão_ 575 88 ._ 573 89 na 572 90 me 564 91 ro 554 92 _t 544 93 pa 533 94 da_ 528 95 h 523 96 ue_ 515 97 ca 511 98 que_ 509 99 nte 503 100 no 499 101 tr 498 102 am 496 103 em_ 491 104 _que_ 487 105 _se 485 106 om 471 107 io 460 108 _do 459 109 ti 448 110 ci 445 111 _da 444 112 nd 442 113 ei 435 114 ra_ 435 115 pr 427 116 _r 423 117 _e_ 420 118 _f 420 119 ss 412 120 es_ 412 121 el 407 122 id 406 123 _o_ 399 124 _pa 390 125 um 379 126 pe 378 127 _po 376 128 la 374 129 ir 371 130 á 371 131 ic 362 132 di 362 133 li 359 134 é 359 135 _re 353 136 ve 353 137 mo 350 138 s, 349 139 s,_ 349 140 ou 347 141 com 340 142 sa 338 143 si 338 144 men 337 145 rt 331 146 _i 330 147 con 330 148 o, 327 149 _da_ 326 150 o,_ 326 151 se_ 325 152 _com 325 153 ado 323 154 to_ 322 155 ai 322 156 it 320 157 A 319 158 ec 316 159 dos 316 160 _em 312 161 ção 310 162 aç 310 163 çã 310 164 ara 305 165 so 299 166 tu 299 167 res 297 168 im 296 169 _pr 295 170 mi 293 171 ua 292 172 nto 291 173 ment 290 174 í 290 175 par 288 176 _do_ 287 177 ce 286 178 est 286 179 u_ 284 180 ente 284 181 S 278 182 l_ 278 183 _u 278 184 " 276 185 ni 276 186 z 274 187 sta 273 188 nc 272 189 _em_ 270 190 P 269 191 ção_ 267 192 _v 267 193 at 267 194 dos_ 266 195 _es 262 196 « 259 197 _« 259 198 te_ 258 199 » 257 200 va 255 201 le 252 202 ur 252 203 _um 252 204 vi 251 205 _par 250 206 a, 247 207 a,_ 247 208 _con 247 209 ant 242 210 lo 240 211 ia_ 240 212 gu 237 213 ar_ 235 214 ac 235 215 e,_ 234 216 e, 234 217 no_ 232 218 eg 232 219 il 232 220 ns 232 221 er_ 231 222 _ma 230 223 por 230 224 _in 228 225 _l 226 226 ó 225 227 ont 224 228 _no 223 229 _P 222 230 tra 220 231 E 219 232 ida 218 233 is_ 217 234 ol 216 235 açã 215 236 ter 215 237 ação 215 238 _A 211 239 un 211 240 - 210 241 _te 210 242 or_ 209 243 ma_ 208 244 _pe 208 245 ara_ 208 246 C 206 247 ist 202 248 para 202 249 nta 201 250 ais 201 251 ut 198 252 nte_ 198 253 j 197 254 dad 196 255 _na 195 256 am_ 195 257 ade 193 258 ica 191 259 x 190 260 al_ 189 261 O 188 262 des 187 263 _para 187 264 ada 187 265 nh 186 266 _se_ 186 267 mp 185 268 ndo 184 269 R 183 270 _por 181 271 ação_ 181 272 para_ 179 273 eir 177 274 ui 177 275 vo 177 276 ou_ 177 277 ta_ 177 278 M 176 279 ria 175 280 tos 175 281 rr 174 282 D 174 283 io_ 174 284 br 174 285 _di 173 286 õ 173 287 õe 173 288 fo 173 289 I 172 290 ões 172 291 _C 171 292 mo_ 171 293 ov 170 294 pro 169 295 _os_ 169 296 _os 169 297 das 167 298 iv 166 299 uma 165 300 gr 165 301 su 164 302 fi 164 303 um_ 162 304 na_ 162 305 ga 162 306 ais_ 161 307 _S 161 308 lh 159 309 ort 159 310 cia 158 311 .. 157 312 _est 156 313 cont 156 314 ig 155 315 á_ 154 316 ran 154 317 ça 154 318 om_ 153 319 _en 152 320 dade 152 321 _as 152 322 ho 152 323 ntr 151 324 nto_ 151 325 fe 150 326 N 149 327 das_ 149 328 uma_ 149 329 ess 149 330 é_ 148 331 ndo_ 147 332 ob 147 333 »_ 147 334 ul 146 335 ente_ 146 336 go 146 337 ento 144 338 ver 144 339 _des 144 340 gi 144 341 ha 142 342 cu 142 343 idad 142 344 av 141 345 ões_ 141 346 _pro 141 347 ura 141 348 ap 139 349 _com_ 139 350 _ca 139 351 com_ 139 352 ao 139 353 ne 138 354 od 138 355 _" 137 356 _M 137 357 pre 137 358 ras 136 359 _me 136 360 _ao 136 361 _no_ 134 362 oc 134 363 str 133 364 tes 133 365 _b 133 366 and 133 367 _g 133 368 ro_ 133 369 omo 133 370 _dos 132 371 _fo 132 372 _dos_ 132 373 rn 132 374 mento 131 375 ito 131 376 ev 131 377 rio 130 378 ass 130 379 eu 130 380 be 128 381 os, 128 382 os,_ 128 383 sp 127 384 _uma 127 385 ep 126 386 tad 125 387 s. 125 388 _uma_ 125 389 _E 125 390 idade 124 391 _um_ 124 392 nã 124 393 não 124 394 ct 123 395 ram 123 396 ado_ 123 397 ela 123 398 omo_ 121 399 iz 121 400 _an 121 -
xapian-omega-1.0.7a/langclass/russian-iso8859_5.lm
diff -u xapian-omega-1.0.7a/langclass/russian-iso8859_5.lm.orig
old new 1 _ 76249 2 Þ 19732 3 Õ 16714 4 Ð 14389 5 Ø 13942 6 â 13160 7 Ý 12444 8 á 9867 9 à 8461 10 Ò 7895 11 Û 7330 12 Ú 6498 13 Ü 5935 14 . 5725 15 ã 5287 16 Ô 5019 17 ß 4877 18 ï 4083 19 , 3899 20 ,_ 3878 21 ë 3656 22 ì 3376 23 Ø_ 3167 24 _ß 3144 25 Õ_ 3135 26 Þ_ 3098 27 - 3019 28 × 2983 29 _Ò 2952 30 ._ 2930 31 _á 2919 32 ç 2887 33 Ó 2876 34 Ñ 2797 35 áâ 2672 36 _Ý 2631 37 âÞ 2585 38 .. 2407 39 _Ø 2316 40 ÝÞ 2300 41 -_ 2294 42 Ù 2281 43 Ð_ 2249 44 ÝÐ 2057 45 ï_ 2029 46 ÞÒ 1981 47 ÝØ 1950 48 _â 1944 49 å 1874 50 ÕÝ 1856 51 _Þ 1774 52 ... 1744 53 àÐ 1709 54 ÝÕ 1685 55 ßÞ 1636 56 _- 1625 57 ÚÞ 1616 58 âÕ 1595 59 àÞ 1584 60 _Ú 1558 61 _-_ 1531 62 Ù_ 1521 63 Õâ 1518 64 Ö 1509 65 _Ø_ 1454 66 âÐ 1433 67 ÐÝ 1419 68 Õà 1396 69 Þâ 1389 70 ì_ 1381 71 ÓÞ 1375 72 ÐÛ 1370 73 _ßÞ 1364 74 àÕ 1345 75 ÚÐ 1338 76 ßà 1337 77 ÒÐ 1329 78 âØ 1306 79 ÛØ 1300 80 _Ô 1297 81 _Ü 1290 82 Õá 1284 83 ÒÞ 1271 84 çÕ 1256 85 Þà 1245 86 .... 1232 87 âì 1219 88 Þá 1212 89 è 1192 90 î 1187 91 Ò_ 1168 92 ..... 1167 93 ÞÝ 1147 94 ÐÚ 1144 95 æ 1109 96 ÞÓ 1101 97 ÛÞ 1099 98 â_ 1093 99 àØ 1076 100 Ü_ 1074 101 _ßà 1063 102 ÞÛ 1062 103 Ûì 1045 104 _ÝÕ 1034 105 ÕÛ 1029 106 _Ñ 1026 107 ØÝ 1000 108 ÞÔ 998 109 ÞÜ 996 110 ÜÕ 993 111 ë_ 975 112 ÒÕ 968 113 áÚ 968 114 _ÝÐ 966 115 ÔÕ 946 116 Ýë 943 117 _à 931 118 _ç 923 119 Ðâ 913 120 ÕÜ 910 121 ã_ 907 122 ×Ð 898 123 å_ 898 124 ÛÕ 889 125 âÞ_ 881 126 Øâ 878 127 ÞÙ 875 128 áÕ 862 129 _Ò_ 833 130 _× 816 131 ÚØ 816 132 âà 807 133 " 786 134 âì_ 769 135 ÕÔ 767 136 ÝÕ_ 762 137 áï 754 138 ÛÐ 748 139 ÞÑ 747 140 ÜÞ 741 141 ÔÐ 730 142 ÓÞ_ 715 143 Ú_ 713 144 Øá 711 145 _ã 702 146 ÞÙ_ 697 147 ØÛ 694 148 ÜÐ 692 149 ÝÝ 687 150 ÔÞ 662 151 Ðá 660 152 ÐÜ 656 153 Øï 649 154 ÖÕ 646 155 Ð× 638 156 áÞ 629 157 ÝÐ_ 619 158 _Ó 615 159 ÜØ 612 160 _Ð 610 161 í 609 162 Òá 608 163 .._ 605 164 Òë 604 165 ÞÓÞ 604 166 ØÜ 604 167 _ÚÞ 600 168 ÐÒ 597 169 áÛ 594 170 ØÕ 593 171 _ÝÕ_ 589 172 _Õ 587 173 _âÕ 583 174 âã 583 175 Øç 583 176 àã 575 177 Þáâ 571 178 é 571 179 Û_ 570 180 _í 569 181 _ï 559 182 ° 556 183 ÕÝØ 544 184 Ø× 540 185 ÕÚ 536 186 ÞÒÐ 533 187 ä 525 188 : 519 189 ² 513 190 ÐÝØ 511 191 _Òá 510 192 ØÙ 510 193 ½ 508 194 _ÚÐ 508 195 ! 503 196 ? 501 197 ÔØ 498 198 ÛØ_ 489 199 ¿ 488 200 ßàÞ 486 201 _àÐ 485 202 áØ 484 203 Øà 484 204 _áâ 484 205 ìÝ 484 206 ÛìÝ 484 207 :_ 484 208 áï_ 480 209 _×Ð 477 210 ÑÞ 470 211 _Û 469 212 ..._ 465 213 Ñë 464 214 Øå 464 215 ¸ 462 216 ÕÓ 461 217 âÒ 459 218 ÝØï 458 219 ïâ 454 220 çâ 454 221 íâ 445 222 çÕá 442 223 _âÞ 442 224 Øï_ 441 225 ØÚ 440 226 ÒØ 437 227 ÐÚ_ 436 228 áâÐ 436 229 ÞÓÞ_ 435 230 _² 434 231 ØçÕ 433 232 æØ 431 233 çâÞ 431 234 ëå 429 235 _íâ 429 236 Á 425 237 _çâ 424 238 _° 424 239 î_ 423 240 ßÕ 422 241 Ýï 422 242 _çâÞ 422 243 Ûï 419 244 ÒáÕ 418 245 Ôã 418 246 ÕáÚ 415 247 Ýâ 413 248 ÚÐÚ 411 249 áâÞ 411 250 _Òë 409 251 Ýã 408 252 âá 406 253 Ý_ 406 254 _¿ 401 255 ¼ 401 256 ÝÞ_ 399 257 _ßàÞ 398 258 _. 397 259 àÞÒ 396 260 æÕ 396 261 Úâ 394 262 Õáâ 394 263 _" 393 264 èÕ 393 265 á_ 392 266 _Þâ 392 267 ¾ 391 268 _ÝÐ_ 391 269 àÕÔ 391 270 çÕáÚ 390 271 Þ- 390 272 ÒÐÝ 388 273 Ð, 385 274 ÐÔ 384 275 Ðï 384 276 _ï_ 383 277 Ð,_ 383 278 âë 383 279 ?_ 383 280 _ÞÑ 380 281 _ÒáÕ 380 282 _âÐ 378 283 _ÚÐÚ 376 284 åÞ 375 285 âÐÚ 375 286 ÐÛì 374 287 ÞÖ 373 288 ëå_ 372 289 ÞÒÞ 372 290 ØÒ 371 291 _ÒÞ 369 292 Üã 369 293 _½ 369 294 ÕÙ 368 295 ßàÕ 368 296 ×Ý 366 297 ØçÕá 365 298 ßØ 365 299 ÕÓÞ 362 300 _áÞ 360 301 ÞÕ 360 302 !_ 360 303 Ðâì 360 304 áâØ 358 305 Øå_ 358 306 âÝ 358 307 ÜÕÝ 358 308 âáï 356 309 ØÕ_ 356 310 ØçÕáÚ 354 311 ÑÕ 352 312 _Ñë 352 313 áâà 349 314 Úã 349 315 _ÜÞ 348 316 Õâ_ 348 317  346 318 _ÜÕ 344 319 ÕÒ 344 320 ßàØ 343 321 çØ 342 322 ÜÝ 341 323 ÝØï_ 339 324 Ðà 338 325 ÝÝÞ 337 326 ÞÒÐÝ 334 327 Úá 334 328 ãà 328 329 _çÕ 328 330 ÞÛì 328 331 ãâ 327 332 çâÞ_ 324 333 ÞÜ_ 323 334 Þß 323 335 àÜ 322 336 °_ 322 337 _çâÞ_ 322 338 íâÞ 322 339 Þç 321 340 Þ,_ 320 341 Þ, 320 342 áÚÞ 319 343 ÚÞÝ 319 344 ÛÞÒ 318 345 ëÙ 318 346 ãá 317 347 Þâ_ 316 348 ØàÞ 315 349 áâÒ 314 350 Ø, 314 351 ØÙ_ 313 352 áá 313 353 Ø,_ 313 354 âÞÜ 312 355 ÐÕ 312 356 ÕÝÝ 311 357 Õ× 311 358 ÝÞÙ 311 359 _íâÞ 310 360 º 309 361 áÐ 309 362 àÐ× 309 363 Õß 309 364 _ÔÞ 308 365 ÞÛÞ 308 366 áâì 308 367 ãÔ 307 368 ÔÕÛ 307 369 _¸ 305 370 ÐÑ 305 371 áì 303 372 ÐÛìÝ 302 373 ÞÚ 300 374 áÚØ 300 375 Ýëå 300 376 _ßàØ 298 377 çÐ 297 378 _Ö 297 379 Õ, 297 380 Õ,_ 297 381 _°_ 296 382 _ÝØ 296 383 _._ 295 384 âáï_ 295 385 ØØ 294 386 ×ÝÐ 293 387 ï, 292 388 ï,_ 292 389 _ßàÕ 291 390 _Á 290 391 ÝÞÙ_ 290 392 Ðï_ 288 393 _¾ 288 394 ÐÛØ 286 395 ÖÕ_ 285 396 áß 285 397 Øáâ 285 398 Ýëå_ 285 399 ×_ 283 400 _¼ 283 -
xapian-omega-1.0.7a/langclass/russian-koi8_r.lm
diff -u xapian-omega-1.0.7a/langclass/russian-koi8_r.lm.orig
old new 1 _ 28256 2 Ï 8957 3 Å 7206 4 Á 6230 5 É 5966 6 Ô 5385 7 Î 5338 8 Ó 4464 9 Ò 3984 10 × 3504 11 Ì 3335 12 Ë 2700 13 Í 2441 14 Ð 2324 15 Ä 2230 16 Õ 1840 17 Ñ 1562 18 Ù 1540 19 _Ð 1505 20 , 1492 21 ,_ 1492 22 Ø 1437 23 Ï_ 1423 24 Å_ 1335 25 ÓÔ 1286 26 Ú 1283 27 Ç 1233 28  1232 29 É_ 1193 30 ÔÏ 1175 31 _Ó 1146 32 Þ 1143 33 _× 1118 34 _Î 1062 35 Á_ 1032 36 . 999 37 ÎÏ 996 38 ÅÎ 920 39 Ï× 911 40 ._ 908 41 Ê 907 42 ÎÁ 893 43 ÒÏ 869 44 ÒÁ 851 45 ÎÉ 844 46 Ñ_ 808 47 ÐÒ 786 48 ÐÏ 775 49 ËÏ 768 50 Ö 763 51 _Ï 735 52 ÎÅ 730 53 ÏÓ 723 54 _É 721 55 È 696 56 ÒÅ 667 57 _ÐÒ 629 58 _Ë 626 59 ×_ 621 60 ÔÅ 620 61 Ø_ 616 62 ×Ï 599 63 Ê_ 597 64 _ÐÏ 592 65 ÇÏ 589 66 ÅÒ 584 67 ÌÉ 579 68 ÔÁ 576 69 ÅÔ 566 70 Í_ 562 71 ÅÓ 548 72 ÏÒ 545 73 ÅÌ 545 74 ÁÌ 544 75 ×Á 535 76 ÔØ 533 77 ÌÅ 528 78 _Ä 522 79 ÏÍ 520 80 ËÁ 520 81 À 518 82 Û 510 83 _Ô 507 84 ÏÔ 502 85 ÏÌ 500 86 ÁÎ 498 87 ÌØ 486 88 ÏÄ 476 89 ÁÔ 475 90 ÔÉ 471 91 ÉÔ 458 92 ÏÎ 457 93 ÏÇ 450 94 à 450 95 ÌÏ 449 96 Ô_ 448 97 _ÎÁ 447 98 ÎÙ 443 99 ÄÅ 442 100 _Í 438 101 ÌÁ 431 102 _×_ 430 103 ÓË 428 104 _ÎÅ 424 105 ÒÉ 400 106 ÅÍ 396 107 _ 392 108 _Þ 388 109 ÞÅ 388 110 ÁË 388 111 _É_ 385 112 _Ò 382 113 ÔÏ_ 381 114 ÉÎ 380 115 ÍÅ 374 116 ÄÁ 362 117 Ù_ 361 118 ×Å 350 119 ÔØ_ 348 120 ÐÒÏ 347 121 ÏÊ 346 122 ÚÁ 338 123 È_ 336 124 ÁÓ 335 125 Ï 328 126 ÓÑ 326 127 ÉÍ 323 128 ÔÒ 321 129 ÎÁ_ 321 130 ÓÏ 320 131 ÅÄ 320 132 ÏÓÔ 318 133 ÉÅ 318 134 ÍÏ 317 135 ÎÎ 316 136 ÓÅ 314 137 - 314 138 Á× 309 139 ÖÅ 308 140 Õ_ 307 141 ËÉ 306 142 ÎÅ_ 305 143 ÇÏ_ 302 144 _Ú 300 145 ÉÌ 295 146 _ÐÒÏ 291 147 Ý 289 148 Ô× 288 149 ÉÓ 285 150 _Õ 275 151 _ËÏ 274 152 ÅÎÉ 274 153 Ü 273 154 ÎÏ_ 272 155 ÉÒ 271 156 _Ç 271 157 ÁÚ 267 158 ÁÒ 266 159 ÄÉ 265 160 ÏÊ_ 264 161 ÏÇÏ 263 162 ÁÍ 260 163 ÐÅ 258 164 ×Ù 255 165 ÍÉ 254 166 ÄÏ 254 167 ÓÔ× 247 168 ÉÚ 246 169 ÃÉ 246 170 î 244 171 Æ 240 172 _Ü 239 173 _Á 239 174 Ë_ 238 175 ó 237 176 ÅÓÔ 237 177 _ÎÅ_ 236 178 ÂÙ 234 179 ÉÑ 234 180 ÒÕ 229 181 ÅË 228 182 ÓÉ 226 183 ÔÅÌ 224 184 ÉË 224 185 ÌÉ_ 222 186 ÅÊ 222 187 ÓÑ_ 222 188 ÎÔ 220 189 ØÎ 218 190 ÞÔ 216 191 ÌØÎ 216 192 ÏÅ 214 193 ÍÁ 213 194 _ÚÁ 211 195 Ï×Á 211 196 ÂÏ 210 197 _Ì 210 198 ÞÁ 209 199 _î 208 200 ÓÌ 205 201 ÞÔÏ 204 202 _ÞÔ 204 203 ÅÔ_ 203 204 _ÞÔÏ 203 205 ÜÔ 202 206 Ó_ 202 207 ×É 201 208 ÁÑ 199 209 ÅÎÎ 199 210 ÔÏÒ 199 211 _ÎÁ_ 199 212 ÓÔÁ 198 213 ÏÖ 198 214 ÉÅ_ 197 215 _ó 197 216 _ÓÏ 195 217 ÐÒÉ 194 218 ËÔ 193 219 Ì_ 193 220 ×Ó 191 221 ÏÍ_ 190 222 ÏÇÏ_ 187 223 _Ö 187 224 ÅÇ 186 225 _ÔÏ 186 226 ÁÄ 186 227 _ÒÁ 185 228 _ÜÔ 184 229 _ËÁ 184 230 ÓÔÉ 183 231 _Å 182 232 Å× 182 233 ÔÓ 181 234 _ÓÔ 180 235 ÌÑ 180 236 ÔÎ 180 237 ÓÐ 173 238 ÉÉ 172 239 ÏÞ 172 240 À_ 172 241 ÐÒÅ 172 242 ÏË 172 243 ÁÔØ 171 244 ÂÉ 171 245 ÛÅ 170 246 ÁÅ 169 247 á 169 248 ÏÒÏ 168 249 ÙÈ 167 250 Ï×Ï 166 251 ÏÌØ 166 252 ÁÎÉ 165 253 ÞÔÏ_ 164 254 _ÞÔÏ_ 163 255 ÷ 163 256 ÁÑ_ 163 257 ÉÈ 162 258 Ï,_ 160 259 Ï, 160 260 ÓÓ 160 261 ÅÚ 159 262 ÎÉÅ 158 263 ÚÎ 157 264 ÜÔÏ 157 265 ÄÎ 156 266 ÒÏ× 156 267 ð 155 268 Á, 155 269 ÓËÏ 155 270 _×Ï 155 271 Á,_ 155 272 _ÂÙ 154 273 ÙÅ 154 274 ÝÅ 154 275 _Ï 154 276 É,_ 153 277 É, 153 278 ÎÎÏ 152 279 _ÐÒÉ 151 280 ÔÙ 150 281 ÓÔÏ 150 282 ÉÑ_ 150 283 ÍÕ 150 284 É× 150 285 _- 149 286 ÒÅÄ 148 287 ÉÔÅ 147 288 _ÜÔÏ 146 289 ÍÅÎ 146 290 ÏÐ 146 291 _ÐÒÅ 145 292 145 293 ÉÊ 145 294 ÅÌØ 145 295 _÷ 144 296 _Ó_ 144 297 ÉÞ 144 298 __ 144 299 _ 144 300 _ 144 301 ÈÏ 143 302 ÅÇÏ 143 303 ÑÔ 142 304 ÔÓÑ 142 305 ÐÅÒ 142 306 ÙÊ 142 307 ÐÁ 141 308 ÏÔÏ 140 309 -_ 140 310 ÁÐ 139 311 ÏÚ 139 312 ÓËÉ 138 313 ÙÈ_ 138 314 ï 137 315 ÓÁ 137 316 ÙÅ_ 137 317 _ð 136 318 Å, 136 319 Å,_ 136 320 ÒÙ 136 321 Î_ 135 322 _á 135 323 ÇÉ 134 324 _×Ù 134 325 ÓÔØ 134 326 ÁÓÔ 133 327 ÖÅ_ 133 328 ÞÉ 133 329 ÎÉÑ 133 330 ÕÔ 133 331 ÁÌÉ 132 332 _-_ 131 333 ËÕ 131 334 ÓØ 131 335 _ÄÏ 129 336 ÄÕ 129 337 ÒÁÚ 129 338 ÖÎ 129 339 ÁÅÔ 129 340 _ÍÏ 127 341 ÖÉ 127 342 _ÄÅ 127 343 ÓÔÒ 127 344 ØÎÏ 126 345 ÔÕ 126 346 _×Ó 126 347 ÌØÎÏ 126 348 ÌÅÎ 125 349 ÁÌØ 125 350 Ï×_ 124 351 ×Ì 123 352 É 123 353 ÁË_ 123 354 _ÐÅ 123 355 ÔÅÌØ 123 356 Ú_ 122 357 ×Î 122 358 ÁÀ 122 359 ÅÎÔ 121 360 ÐÏÌ 121 361 ÉÈ_ 121 362 ÙÊ_ 121 363 ÓÎ 121 364 ÎÏÇ 121 365 ÃÅ 120 366 _ÏÔ 120 367 ÅÍ_ 119 368 ÉÏ 119 369 _ÔÅ 118 370 ËÁË 118 371 _ÎÏ 117 372 ÉÉ_ 117 373 ÉÔØ 117 374 ë 116 375 _ÉÚ 116 376 ÕÓ 116 377 ÎÏÓ 115 378 ÂÅ 115 379 ÕÄ 115 380 ÅÒÅ 114 381 ÂÌ 114 382 ÉÔÅÌ 113 383 ÅÇÏ_ 113 384 ÙÍ 113 385 Á 113 386 ÎØ 113 387 ÎÏÊ 112 388 ÎÉÅ_ 112 389 _Æ 112 390 ÒÏÓ 111 391 ËÒ 111 392 _ËÁË 111 393 ÛÉ 111 394 Í, 111 395 ÎÏ× 111 396 Í,_ 111 397 ÔÏÍ 110 398 ÅÅ 110 399 ÔÏ× 109 400 Ó× 109 -
xapian-omega-1.0.7a/langclass/russian-windows1251.lm
diff -u xapian-omega-1.0.7a/langclass/russian-windows1251.lm.orig
old new 1 _ 21836 2 î 5818 3 à 4506 4 å 4258 5 è 3769 6 ò 3394 7 í 3254 8 ñ 2594 9 ë 2470 10 ð 2346 11 â 2227 12 ê 1798 13 ì 1709 14 ó 1673 15 ä 1638 16 ï 1377 17 , 1187 18 ,_ 1172 19 î_ 1160 20 ÿ 1059 21 ü 1024 22 û 1006 23 å_ 966 24 á 945 25 _ï 937 26 ç 910 27 _í 907 28 è_ 896 29 òî 878 30 ã 856 31 _ñ 828 32 _â 818 33 à_ 801 34 ÷ 795 35 . 789 36 ._ 761 37 ñò 655 38 íå 617 39 _è 616 40 íà 575 41 ø 568 42 é 565 43 ïî 549 44 ðà 526 45 íî 524 46 ÿ_ 522 47 _ò 521 48 _î 497 49 _ä 496 50 æ 484 51 _ïî 477 52 _ê 477 53 ãî 472 54 àë 470 55 ü_ 461 56 íè 456 57 îâ 454 58 êî 452 59 ðî 432 60 êà 431 61 ëè 425 62 õ 413 63 òà 403 64 åí 402 65 _ì 396 66 _á 396 67 âî 395 68 îë 388 69 òî_ 384 70 é_ 383 71 _íå 378 72 _è_ 375 73 îð 369 74 âà 367 75 _íà 365 76 ì_ 364 77 îì 363 78 ïð 357 79 ó_ 357 80 åë 353 81 åð 352 82 ëà 350 83 èò 344 84 îñ 344 85 ë_ 343 86 òü 338 87 äå 337 88 ëî 334 89 ðè 330 90 ðå 328 91 èë 326 92 àê 326 93 îò 323 94 - 322 95 îí 320 96 åò 317 97 þ 312 98 àí 308 99 âå 308 100 â_ 302 101 _÷ 299 102 ò_ 296 103 çà 292 104 íå_ 292 105 òå 291 106 äà 291 107 _ç 291 108 îä 286 109 àð 283 110 àò 283 111 _ïð 274 112 åñ 264 113 ìî 263 114 û_ 260 115 îã 253 116 _- 245 117 _ó 245 118 ê_ 244 119 îá 240 120 ãî_ 238 121 ñÿ 235 122 _íå_ 235 123 ìå 233 124 ëü 229 125 -_ 225 126 ñê 219 127 _â_ 217 128 åì 217 129 íû 215 130 îé 215 131 ñå 211 132 áû 211 133 êè 211 134 ÷ò 209 135 òü_ 209 136 ÷å 208 137 _ã 206 138 Í 206 139 äî 205 140 ö 205 141 ÷òî 203 142 èí 203 143 àñ 202 144 _-_ 202 145 _÷ò 199 146 _÷òî 199 147 òè 197 148 àâ 197 149 íà_ 197 150 _å 195 151 åä 192 152 _ð 191 153 æå 190 154 àì 190 155 åã 188 156 àç 188 157 òð 185 158 _òî 184 159 _Í 183 160 èì 182 161 _çà 181 162 äè 175 163  174 164 _êî 172 165 ëå 170 166 _ 169 167 îé_ 168 168 ! 168 169 ÷òî_ 165 170 ìó 165 171 íî_ 164 172 èñ 164 173 í_ 164 174 ìè 163 175 ñòà 163 176 ù 162 177 _÷òî_ 162 178 _áû 159 179 ìà 158 180 ðó 157 181 âû 156 182 âè 154 183 âñ 154 184 åãî 153 185 èê 153 186 èç 153 187 ñü 153 188 àø 152 189 ûë 150 190 _íà_ 149 191 ñ_ 148 192 òâ 148 193 _ñò 146 194 ñÿ_ 146 195 ëè_ 146 196 _ÿ 144 197 îãî 143 198 øå 141 199 õ_ 141 200 àê_ 141 201 !_ 139 202 ñî 139 203 êó 139 204 áî 137 205 ïðî 136 206 àòü 136 207 åãî_ 135 208 _êà 134 209 _ìî 131 210 èå 131 211 îñò 130 212 îì_ 129 213 î, 128 214 _ãî 128 215 î,_ 127 216 èâ 127 217 ñë 126 218 óò 126 219 _òà 126 220 íí 125 221 àä 125 222 Ñ 124 223 íó 123 224 î÷ 122 225 _âû 122 226 _âñ 121 227 _îí 120 228 áûë 120 229 ïà 120 230 óä 120 231 _Ñ 119 232 ïðè 119 233 îâî 119 234 õî 118 235 ý 117 236 _áûë 117 237 è, 115 238 åé 115 239 _äî 115 240 àë_ 114 241 _æ 114 242 è,_ 114 243 _à 113 244 _ý 113 245 îå 112 246 åñò 112 247 îðî 110 248 çí 110 249 _äå 110 250 øè 109 251 èä 109 252 ñòî 109 253 _ïðî 109 254 _âî 108 255 àëè 108 256 íÿ 107 257 øà 107 258 ýò 107 259 îæ 106 260 à, 106 261 Ï 106 262 ûé 105 263 îëü 105 264 óñ 105 265 _ýò 105 266 _ïðè 104 267 èõ 104 268 ñü_ 104 269 ñè 104 270 îï 104 271 Ì 103 272 à,_ 103 273 àÿ 103 274 åç 102 275 äåë 102 276 _ðà 102 277 _Ï 101 278 îâà 101 279 óæ 101 280 âàë 101 281 þ_ 101 282 êàê 100 283 _ÿ_ 100 284 ó, 100 285 æå_ 100 286 ñòâ 100 287 äí 100 288 ÷è 100 289 òó 100 290 _õ 100 291 _Ì 99 292 ò, 99 293 îê 99 294 èòü 99 295 åò_ 99 296 _ñî 99 297 ëÿ 98 298 àå 98 299 _îò 98 300 îãî_ 98 301 Ê 97 302 âîð 97 303 òí 97 304 _ë 97 305 ó,_ 97 306 äó 96 307 _äà 96 308 ò,_ 96 309 áà 96 310 ïå 96 311 ëà_ 95 312 áó 95 313 áå 94 314 ìó_ 94 315 _ìå 94 316 ÷à 94 317 âñå 93 318 òû 93 319 êà_ 92 320 _êàê 91 321 ìí 91 322 òñ 90 323 íü 90 324 _îá 90 325 ? 90 326 ðàç 89 327 òàê 89 328 _ñ_ 89 329 ñà 89 330 å, 89 331 æè 88 332 ô 88 333 _òàê 88 334 èÿ 88 335 è÷ 88 336 îí_ 87 337 ìåí 87 338 å,_ 87 339 ûé_ 86 340 ë, 86 341 ë,_ 86 342 òîì 86 343 ðàí 86 344 ýòî 86 345 Íó 86 346 È 86 347 öè 85 348 èå_ 85 349 âåð 85 350 ñâ 84 351 _îí_ 84 352 _âñå 84 353 ðû 84 354 _ýòî 84 355 _È 84 356 ç_ 83 357 ïè 83 358 ëó 83 359 À 83 360 åíè 82 361 èë_ 82 362 " 82 363 ïîë 82 364 àòü_ 81 365 òîð 81 366 èëè 81 367 êð 81 368 ëñ 81 369 òåë 81 370 åíí 81 371 îòî 80 372 äà_ 79 373 èòå 79 374 ñòð 79 375 îâîð 79 376 ãîâ 79 377 îðè 79 378 àíè 79 379 _Íó 78 380 ëñÿ 78 381 _íè 78 382 è. 77 383 _ñâ 77 384 è._ 76 385 à. 76 386 _èç 76 387 èø 76 388 ùå 76 389 ÿò 76 390 îëî 76 391 ü, 75 392 Î 75 393 êè_ 75 394 ü,_ 75 395 ëî_ 75 396 îç 74 397 _åã 74 398 ëüí 74 399 üí 74 400 ûå 74 -
xapian-omega-1.0.7a/langclass/spanish.lm
diff -u xapian-omega-1.0.7a/langclass/spanish.lm.orig
old new 1 _ 25044 2 e 7830 3 a 7437 4 o 5102 5 s 4394 6 n 4358 7 i 4065 8 r 3998 9 l 3634 10 d 3118 11 c 2931 12 t 2834 13 u 2316 14 a_ 2269 15 e_ 2211 16 s_ 1862 17 de 1679 18 p 1673 19 _d 1644 20 m 1447 21 _de 1443 22 n_ 1332 23 o_ 1301 24 en 1295 25 _e 1216 26 es 1177 27 _l 1132 28 de_ 1080 29 la 1060 30 os 1028 31 _de_ 1027 32 _p 963 33 l_ 910 34 ci 890 35 _c 866 36 _a 866 37 os_ 801 38 ar 777 39 er 775 40 as 768 41 ra 746 42 nt 736 43 _la 727 44 re 726 45 ,_ 724 46 , 724 47 el 722 48 ta 708 49 ue 701 50 g 678 51 on 674 52 al 670 53 _s 666 54 co 653 55 b 637 56 an 622 57 v 616 58 la_ 616 59 or 612 60 te 599 61 st 596 62 el_ 580 63 _la_ 573 64 y 545 65 to 543 66 r_ 517 67 ad 512 68 ó 511 69 do 504 70 ro 504 71 se 488 72 as_ 488 73 q 487 74 qu 487 75 . 479 76 ._ 478 77 en_ 475 78 ca 460 79 in 459 80 un 456 81 _co 450 82 es_ 449 83 ic 449 84 _en 440 85 ac 440 86 que 439 87 na 439 88 lo 430 89 _m 430 90 f 429 91 ent 428 92 da 412 93 ue_ 411 94 po 405 95 le 399 96 _q 399 97 _qu 399 98 que_ 393 99 _que 388 100 ie 386 101 h 385 102 pa 382 103 y_ 371 104 ti 367 105 _que_ 365 106 _en_ 365 107 _y 361 108 tr 358 109 _el 353 110 ri 349 111 ia 342 112 _el_ 333 113 _se 330 114 ió 330 115 _y_ 330 116 io 329 117 pr 320 118 ón 317 119 ec 317 120 no 314 121 id 301 122 í 300 123 mi 299 124 _t 299 125 ión 292 126 nte 292 127 me 286 128 aci 283 129 do_ 279 130 li 276 131 con 276 132 nd 273 133 est 272 134 ni 272 135 á 271 136 di 270 137 _es 268 138 _lo 267 139 ció 265 140 ma 265 141 ón_ 264 142 _pr 263 143 _r 261 144 ción 255 145 z 254 146 ra_ 251 147 si 247 148 ión_ 246 149 oc 245 150 nc 244 151 _u 244 152 _po 243 153 los 243 154 or_ 242 155 _con 241 156 is 239 157 del 238 158 _del 237 159 ado 236 160 se_ 233 161 _i 233 162 los_ 231 163 _re 231 164 por 229 165 _del_ 228 166 sta 228 167 del_ 228 168 al_ 228 169 ne 226 170 _h 226 171 cu 225 172 _n 225 173 _a_ 224 174 _v 224 175 _un 223 176 ce 222 177 so 220 178 ción_ 218 179 res 218 180 vi 217 181 om 216 182 te_ 212 183 _pa 211 184 ien 210 185 j 209 186 E 208 187 _los 207 188 _los_ 207 189 to_ 206 190 ol 204 191 it 203 192 am 202 193 ació 201 194 rt 201 195 ación 201 196 pe 197 197 ha 190 198 _se_ 189 199 nto 188 200 _o 184 201 _E 184 202 on_ 184 203 sa 183 204 na_ 182 205 ta_ 181 206 su 180 207 cia 180 208 mo 180 209 ct 178 210 par 178 211 _f 177 212 _por 176 213 eg 172 214 _in 172 215 ur 170 216 L 168 217 ve 166 218 im 164 219 ga 163 220 _est 161 221 ar_ 161 222 ab 160 223 _L 159 224 tu 158 225 at 158 226 no_ 157 227 s, 157 228 s,_ 157 229 _por_ 156 230 por_ 156 231 las 156 232 ba 154 233 o,_ 154 234 o, 154 235 ento 151 236 et 150 237 C 150 238 _ha 149 239 A 149 240 tra 148 241 ient 148 242 _al 147 243 a,_ 146 244 ica 146 245 a, 146 246 pro 146 247 ado_ 145 248 ici 144 249 _ca 144 250 an_ 144 251 las_ 143 252 ara 143 253 nci 143 254 ente 142 255 ú 142 256 rr 142 257 ir 142 258 da_ 141 259 em 141 260 ll 140 261 il 139 262 ía 138 263 iv 138 264 _su 138 265 _par 136 266 ul 136 267 ant 136 268 _A 135 269 mp 135 270 _las_ 134 271 _las 134 272 _C 134 273 _pro 133 274 men 132 275 P 132 276 des 131 277 com 130 278 ion 130 279 era 130 280 ed 129 281 ida 129 282 sp 128 283 gu 127 284 nte_ 127 285 ns 127 286 za 126 287 dos 125 288 M 125 289 cio 125 290 les 125 291 _P 124 292 bl 124 293 _com 122 294 s._ 122 295 s. 122 296 _M 121 297 ua 120 298 nta 120 299 mu 119 300 _no 118 301 dad 118 302 ñ 117 303 é 116 304 un_ 116 305 va 116 306 ist 116 307 nes 116 308 iento 115 309 one 114 310 ara_ 113 311 S 113 312 ada 113 313 _un_ 113 314 fi 111 315 pre 110 316 tos 110 317 ter 109 318 ot 109 319 esta 108 320 _me 107 321 ido 107 322 ob 107 323 _g 105 324 br 105 325 go 105 326 ea 104 327 nto_ 104 328 ona 103 329 pu 103 330 dos_ 103 331 tro 103 332 ier 103 333 para 102 334 ment 101 335 ag 101 336 ero 101 337 gr 101 338 rec 101 339 bi 101 340 ia_ 100 341 una 100 342 nic 99 343 ncia 99 344 ía_ 98 345 a._ 98 346 tos_ 98 347 a. 98 348 ran 98 349 lo_ 97 350 ones 97 351 rm 96 352 lu 96 353 ron 95 354 con_ 95 355 ó_ 95 356 nes_ 95 357 _ci 95 358 ante 94 359 ch 94 360 _con_ 94 361 _para 94 362 ntr 93 363 una_ 93 364 para_ 93 365 mie 92 366 ico 92 367 fe 92 368 les_ 92 369 uc 92 370 ip 91 371 sto 91 372 _ma 91 373 ui 91 374 sta_ 91 375 _ve 90 376 cion 90 377 " 90 378 op 90 379 cal 89 380 _mu 89 381 _S 89 382 ro_ 89 383 _pe 88 384 ste 88 385 ras 88 386 pl 88 387 _una 88 388 _di 87 389 ento_ 86 390 ita 86 391 ione 85 392 ect 85 393 _una_ 85 394 mien 85 395 tan 85 396 du 84 397 den 84 398 ndo 84 399 per 84 400 eri 84 -
xapian-omega-1.0.7a/langclass/swedish.lm
diff -u xapian-omega-1.0.7a/langclass/swedish.lm.orig
old new 1 _ 33494 2 e 8992 3 n 7900 4 t 7859 5 a 7781 6 r 7251 7 s 6435 8 i 5649 9 l 4541 10 d 4079 11 o 3724 12 m 3203 13 k 3058 14 g 2478 15 en 2403 16 n_ 2389 17 t_ 2073 18 de 1939 19 r_ 1910 20 v 1890 21 h 1789 22 u 1782 23 _s 1768 24 ä 1724 25 er 1709 26 f 1597 27 en_ 1537 28 a_ 1526 29 an 1357 30 p 1320 31 et 1317 32 ö 1278 33 å 1261 34 st 1236 35 ar 1226 36 c 1191 37 _d 1158 38 e_ 1116 39 in 1045 40 _f 1027 41 te 1000 42 b 997 43 _a 978 44 s_ 974 45 ra 958 46 . 956 47 tt 935 48 _i 898 49 _m 890 50 ._ 886 51 ll 870 52 ta 844 53 _o 842 54 _e 839 55 nd 820 56 ti 804 57 sk 798 58 re 779 59 at 769 60 _de 754 61 om 743 62 m_ 739 63 ör 720 64 , 697 65 ,_ 695 66 ng 686 67 li 673 68 ka 666 69 oc 662 70 _h 654 71 on 652 72 et_ 647 73 ch 645 74 ns 643 75 is 642 76 er_ 630 77 är 625 78 _v 614 79 _t 614 80 ni 611 81 i_ 609 82 _oc 592 83 tt_ 587 84 na 586 85 y 586 86 la 579 87 _b 579 88 h_ 577 89 kt 575 90 ch_ 568 91 ig 564 92 fö 563 93 och 555 94 or 555 95 _och 554 96 och_ 554 97 _och_ 553 98 me 548 99 den 548 100 om_ 535 101 _i_ 531 102 d_ 530 103 j 529 104 ik 520 105 de_ 520 106 för 518 107 ge 498 108 ad 497 109 _k 491 110 _fö 487 111 ri 484 112 el 482 113 il 481 114 so 480 115 al 474 116 g_ 469 117 le 464 118 an_ 461 119 _för 447 120 si 437 121 ar_ 437 122 att 435 123 _p 434 124 es 420 125 ing 413 126 se 407 127 to 404 128 _u 403 129 _en 403 130 and 398 131 den_ 395 132 nde 393 133 nn 393 134 _l 391 135 å_ 391 136 D 385 137 än 383 138 nt 382 139 l_ 381 140 tr 378 141 _D 372 142 va 370 143 am 369 144 sa 367 145 _so 365 146 ga 364 147 _en_ 361 148 är_ 358 149 ck 357 150 av 354 151 v_ 351 152 ed 347 153 ma 346 154 da 346 155 som 346 156 rs 344 157 som_ 344 158 ve 342 159 ter 341 160 att_ 341 161 ha 338 162 ne 337 163 ut 335 164 as 332 165 ska 329 166 _at 327 167 _att 326 168 _som 324 169 _att_ 324 170 _som_ 323 171 vi 322 172 ikt 317 173 _av 316 174 det 316 175 _den 315 176 he 315 177 ss 314 178 un 307 179 ke 304 180 _g 303 181 us 302 182 di 302 183 _st 300 184 rn 297 185 _me 296 186 _ä 295 187 ade 294 188 " 290 189 _ha 290 190 av_ 289 191 ill 288 192 _n 286 193 _in 279 194 io 275 195 _r 275 196 der 275 197 it 274 198 _av_ 274 199 sta 274 200 gen 272 201 isk 270 202 _ti 269 203 id 265 204 na_ 265 205 ns_ 264 206 ko 262 207 _den_ 261 208 ag 258 209 det_ 257 210 lig 257 211 era 256 212 ll_ 255 213 _det 252 214 _är 251 215 be 249 216 _är_ 248 217 ra_ 247 218 ion 244 219 - 241 220 pr 240 221 oni 233 222 til 231 223 ten 228 224 _si 225 225 k_ 222 226 på 222 227 fr 221 228 ro 219 229 till 219 230 iv 216 231 ls 216 232 ande 215 233 ör_ 214 234 _det_ 213 235 äl 212 236 _på 211 237 ts 210 238 ens 209 239 med 209 240 mm 208 241 rt 208 242 _till 208 243 _til 208 244 _va 207 245 _fr 205 246 _sk 205 247 var 205 248 nin 204 249 ning 203 250 ol 201 251 ka_ 200 252 lle 198 253 ett 198 254 rd 197 255 em 196 256 på_ 195 257 x 195 258 rk 194 259 _ut 194 260 ste 194 261 ds 193 262 _vi 192 263 år 192 264 S 192 265 nde_ 191 266 are 191 267 ver 190 268 _på_ 190 269 nis 189 270 kr 189 271 _med 188 272 all 188 273 ån 187 274 nge 185 275 mo 184 276 os 183 277 ld 182 278 ade_ 181 279 _S 181 280 ed_ 180 281 rä 176 282 De 175 283 _- 175 284 kan 174 285 ta_ 173 286 ng_ 172 287 vä 171 288 för_ 170 289 ill_ 170 290 han 170 291 _De 170 292 pp 169 293 lt 169 294 sam 168 295 nte 167 296 ans 167 297 ton 166 298 ur 165 299 mi 165 300 ess 165 301 kl 164 302 ig_ 164 303 ks 164 304 as_ 163 305 und 163 306 men 162 307 med_ 161 308 _med_ 161 309 ak 161 310 Di 160 311 ot 159 312 rna 159 313 ul 159 314 _var 159 315 te_ 158 316 gen_ 158 317 het 157 318 kto 157 319 str 156 320 _Di 155 321 tad 155 322 lan 154 323 ga_ 154 324 iska 154 325 fa 154 326 fi 154 327 så 154 328 Dikt 153 329 Dik 153 330 pe 153 331 ska_ 152 332 ja 152 333 H 151 334 res 151 335 ku 151 336 iu 150 337 ande_ 150 338 till_ 150 339 t. 150 340 ern 150 341 rm 149 342 _Dikt 149 343 _Dik 149 344 ie 149 345 bl 148 346 -_ 147 347 od 147 348 _H 147 349 n. 147 350 ist 147 351 _di 146 352 ius 146 353 _" 145 354 la_ 145 355 sl 145 356 man 145 357 ren 145 358 _för_ 145 359 toni 144 360 kton 144 361 n._ 144 362 ktoni 144 363 ikton 144 364 I 144 365 ikto 144 366 nius 143 367 ten_ 143 368 onius 143 369 oniu 143 370 toniu 143 371 ing_ 143 372 Dikto 143 373 niu 143 374 _ko 143 375 ic 142 376 _sa 142 377 _han 142 378 ett_ 142 379 sm 141 380 ba 141 381 M 141 382 gr 140 383 lä 140 384 ex 138 385 t._ 138 386 sp 137 387 lla 137 388 _et 137 389 _M 137 390 dr 137 391 rö 136 392 rad 136 393 ek 136 394 _be 135 395 tar 135 396 _-_ 135 397 _om 134 398 rl 134 399 E 134 400 mä 133 -
xapian-omega-1.0.7a/mimeexplode
diff -u xapian-omega-1.0.7a/mimeexplode.orig
old new 1 #!/usr/bin/perl -w 2 3 =head1 NAME 4 5 mimeexplode - explode one or more MIME messages 6 7 =head1 SYNOPSIS 8 9 mimeexplode [-d <dir>] <mime-msg-file> <mime-msg-file> ... 10 11 someprocess | mimeexplode - 12 13 =head1 DESCRIPTION 14 15 Takes one or more files from the command line that contain MIME 16 messages, and explodes their contents out into subdirectories 17 of the current working directory. The subdirectories are 18 just called C<msg0>, C<msg1>, C<msg2>, etc. Existing directories are 19 skipped over. 20 21 The message information is output to the stdout, like this: 22 23 Message: msg3 (inputfile1.msg) 24 Part: msg3/filename-1.dat (text/plain) 25 Part: msg3/filename-2.dat (text/plain) 26 Message: msg5 (input-file2.msg) 27 Part: msg5/dir.gif (image/gif) 28 Part: msg5/face.jpg (image/jpeg) 29 Message: msg6 (infile3) 30 Part: msg6/filename-1.dat (text/plain) 31 32 This was written as an example of the MIME:: modules in the 33 MIME-parser package I wrote. It may prove useful as a quick-and-dirty 34 way of splitting a MIME message if you need to decode something, and 35 you don't have a MIME mail reader on hand. 36 37 =head1 COMMAND LINE OPTIONS 38 39 -d outdir 40 41 =head1 AUTHOR 42 43 Eryq C<eryq@zeegee.com>, in a big hurry... 44 Reini Urban C<rurban@x-ray.at>: -d option to always explode into the same dir 45 46 =cut 47 48 #BEGIN { unshift @INC, ".." } # to test MIME:: stuff before installing it! 49 50 require 5.001; 51 52 use strict; 53 use vars; 54 55 use MIME::Parser; 56 use Getopt::Std; 57 my %opts; 58 my $outbase = ''; 59 my $postfix = ''; 60 61 #------------------------------------------------------------ 62 # make_msg - make and return the name of a msgXXX directory 63 #------------------------------------------------------------ 64 65 #ignored 66 #sub make_msg { 67 # while (-d "msg$Msgno") { 68 # ++$Msgno; 69 # die "self-imposed limit reached" if $Msgno == 256; 70 # } 71 # mkdir "msg$Msgno",0755 or die "couldn't make msg$Msgno: $!"; 72 # "msg$Msgno"; 73 #} 74 75 #------------------------------------------------------------ 76 # dump_entity - dump an entity's file info 77 #------------------------------------------------------------ 78 sub dump_entity { 79 my $ent = shift; 80 my @parts = $ent->parts; 81 82 if (@parts) { # multipart... 83 map { dump_entity($_) } @parts; 84 } 85 else { # single part... 86 print " Part: ", $ent->bodyhandle->path, 87 " (", scalar($ent->head->mime_type), ")\n"; 88 } 89 } 90 91 #------------------------------------------------------------ 92 # main 93 #------------------------------------------------------------ 94 sub main { 95 my $file; 96 my $entity; 97 98 # make sure the same message gets exploded into the same dir 99 getopts('d:', \%opts); 100 $outbase = $opts{d} ? $opts{d} : "msg0"; 101 my $outdir = $outbase; 102 103 # Go through messages: 104 @ARGV or unshift @ARGV, "-"; 105 while (defined($file = shift @ARGV)) { 106 107 # Sanity: 108 (-d $outdir) or mkdir "$outdir",0755; 109 (-w "$outdir") or die "cwd $outdir not writable!"; 110 #my $msgdir = make_msg(); 111 #print "Message: $msgdir ($file)\n"; 112 113 # Create a new parser object: 114 my $parser = new MIME::Parser; 115 ### $parser->parse_nested_messages('REPLACE'); 116 117 # Optional: set up parameters that will affect how it extracts 118 # documents from the input stream: 119 $parser->output_dir($outdir); 120 121 # Parse an input stream: 122 open FILE, $file or die "couldn't open $file"; 123 $entity = $parser->read(\*FILE) or 124 print STDERR "Couldn't parse MIME in $file; continuing...\n"; 125 close FILE; 126 127 # Congratulations: you now have a (possibly multipart) MIME entity! 128 dump_entity($entity) if $entity; 129 ### $entity->dump_skeleton if $entity; 130 131 $postfix++; 132 $outdir = $outbase.$postfix; 133 } 134 1; 135 } 136 137 exit (&main ? 0 : -1); 138 #------------------------------------------------------------ 139 1; 140 -
xapian-omega-1.0.7a/msgconvert.pl
diff -u xapian-omega-1.0.7a/msgconvert.pl.orig
old new 1 #!/usr/bin/perl -w 2 # 3 # msgconvert.pl: 4 # 5 # Convert .MSG files (made by Outlook (Express)) to multipart MIME messages. 6 # 7 # Copyright 2002, 2004, 2006 Matijs van Zuijlen 8 # 9 # This program is free software; you can redistribute it and/or modify it 10 # under the terms of the GNU General Public License as published by the 11 # Free Software Foundation; either version 2 of the License, or (at your 12 # option) any later version. 13 # 14 # This program is distributed in the hope that it will be useful, but 15 # WITHOUT ANY WARRANTY; without even the implied warranty of 16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 17 # Public License for more details. 18 # 19 # CHANGES: 20 # 20020715 Recognize new items 'Cc', mime type of attachment, long 21 # filename of attachment, and full headers. Attachments turn out 22 # to be numbered, so a regexp is now used to recognize label of 23 # items that are attachments. 24 # 20020831 long file name will definitely be used if present. Full headers 25 # and mime type information are used when present. Created 26 # generic system for specifying known items to be skipped. 27 # Unexpected contents is never reason to bail out anymore. Added 28 # support for usage message and option processing (--verbose). 29 # 20040104 Handle address data slightly better, make From line less fake, 30 # make $verbose and $skippable_entries global vars, handle HTML 31 # variant of body text if present (though not optimally). 32 # 20040214 Fix typos and incorrect comments. 33 # 20040307 - Complete rewrite: All functional parts are now in the package 34 # MSGParser; 35 # - Creation of MIME::Entity object is delayed until the output 36 # routines, which means all data is known; This means I can 37 # create a multipart/alternative body. 38 # - Item names are parsed (thanks to bfrederi@alumni.sfu.ca for 39 # the information). 40 # 20040514 Check if $self->{HEAD} actually exists before trying to add its 41 # contents to the output Mime object's header data. 42 # (Bug reported by Thomas Ng). 43 # Don't produce multipart messages if not needed. 44 # (Bug reported by Justin B. Scout). 45 # 20040529 Correctly format OLEDATE. 46 # 20040530 - Extract date from property 0047 (thanks, Marc Goodman). 47 # - Use address data to make To: and Cc: lines complete 48 # - Use the in-reply-to property 49 # - More unknown properties named. 50 # - Found another property containing an SMTP address. 51 # - Put non-SMTP type addresses back in output. 52 # 20040825 Replace 'our' to declare globals with 'use vars'. This means 53 # the globals our now properly scoped inside the package and not 54 # the file. 55 # This also fixes the bug that this program did not work on perl 56 # versions below 5.6. (Bug reported by Tim Gustafson) 57 # 20060218 More sensible encoding warnings. 58 # 20060219 Move OLE parsing to main program. 59 # Parse nested MSG files (Bug reported by Christof Lukas). 60 # 20060225 Simplify code. 61 # 62 63 # 64 # Import modules. 65 # 66 package MSGParser; 67 use strict; 68 use OLE::Storage_Lite; 69 use MIME::Entity; 70 use MIME::Parser; 71 use Date::Format; 72 use POSIX qw(mktime); 73 use constant DIR_TYPE => 1; 74 use constant FILE_TYPE => 2; 75 76 use vars qw($skipproperties $skipheaders); 77 # 78 # Descriptions partially based on mapitags.h 79 # 80 $skipproperties = { 81 # Envelope properties 82 '000B' => "Conversation key?", 83 '001A' => "Type of message", 84 '003B' => "Sender address variant", 85 '003D' => "Contains 'Re: '", 86 '003F' => "'recieved by' id", 87 '0040' => "'recieved by' name", 88 '0041' => "Sender variant address id", 89 '0042' => "Sender variant name", 90 '0043' => "'recieved representing' id", 91 '0044' => "'recieved representing' name", 92 '0046' => "Read receipt address id", 93 '0051' => "'recieved by' search key", 94 '0052' => "'recieved representing' search key", 95 '0053' => "Read receipt search key", 96 '0064' => "Sender variant address type", 97 '0065' => "Sender variant address", 98 '0070' => "Conversation topic", 99 '0071' => "Conversation index", 100 '0075' => "'recieved by' address type", 101 '0076' => "'recieved by' email address", 102 '0077' => "'recieved representing' address type", 103 '0078' => "'recieved representing' email address", 104 '007F' => "something like a message id", 105 # Recipient properties 106 '0C19' => "Reply address variant", 107 '0C1D' => "Reply address variant", 108 '0C1E' => "Reply address type", 109 # Non-transmittable properties 110 '0E02' => "?Should BCC be displayed", 111 '0E0A' => "sent mail id", 112 '0E1D' => "Subject w/o Re", 113 '0E27' => "64 bytes: Unknown", 114 '0FF6' => "Index", 115 '0FF9' => "Index", 116 '0FFF' => "Address variant", 117 # Content properties 118 '1008' => "Summary or something", 119 '1009' => "RTF Compressed", 120 # 'Common property' 121 '3001' => "Display name", 122 '3002' => "Address Type", 123 '300B' => "'Search key'", 124 # Attachment properties 125 '3702' => "Attachment encoding", 126 '3703' => "Attachment extension", 127 '3709' => "'Attachment rendering'", # Maybe an icon or something? 128 '3713' => "Icon URL?", 129 # 'Mail user' 130 '3A20' => "Address variant", 131 # 3900 -- 39FF: 'Address book' 132 '39FF' => "7 bit display name", 133 # 'Display table properties' 134 '3FF8' => "Routing data?", 135 '3FF9' => "Routing data?", 136 '3FFA' => "Routing data?", 137 '3FFB' => "Routing data?", 138 # 'Transport-defined envelope property' 139 '4029' => "Sender variant address type", 140 '402A' => "Sender variant address", 141 '402B' => "Sender variant name", 142 '5FF6' => "Recipient name", 143 '5FF7' => "Recipient address variant", 144 # 'Provider-defined internal non-transmittable property' 145 '6740' => "Unknown, binary data", 146 # User defined id's 147 '8000' => "Content Class", 148 '8002' => "Unknown, binary data", 149 }; 150 151 $skipheaders = { 152 "MIME-Version" => 1, 153 "Content-Type" => 1, 154 "Content-Transfer-Encoding" => 1, 155 "X-Mailer" => 1, 156 "X-Msgconvert" => 1, 157 "X-MS-Tnef-Correlator" => 1, 158 "X-MS-Has-Attach" => 1, 159 }; 160 161 use constant ENCODING_UNICODE => '001F'; 162 use constant KNOWN_ENCODINGS => { 163 '000D' => 'Directory', 164 '001F' => 'Unicode', 165 '001E' => 'Ascii?', 166 '0102' => 'Binary', 167 }; 168 169 use constant MAP_ATTACHMENT_FILE => { 170 '3701' => ["DATA", 0], # Data 171 '3704' => ["SHORTNAME", 1], # Short file name 172 '3707' => ["LONGNAME", 1], # Long file name 173 '370E' => ["MIMETYPE", 1], # mime type 174 '3716' => ["DISPOSITION", 1], # disposition 175 }; 176 177 use constant MAP_SUBITEM_FILE => { 178 '1000' => ["BODY_PLAIN", 0], # Body 179 '1013' => ["BODY_HTML", 0], # HTML Version of body 180 '0037' => ["SUBJECT", 1], # Subject 181 '0047' => ["SUBMISSION_ID", 1], # Seems to contain the date 182 '007D' => ["HEAD", 1], # Full headers 183 '0C1A' => ["FROM", 1], # Reply-To: Name 184 '0C1E' => ["FROM_ADDR_TYPE", 1], # From: Address type 185 '0C1F' => ["FROM_ADDR", 1], # Reply-To: Address 186 '0E04' => ["TO", 1], # To: Names 187 '0E03' => ["CC", 1], # Cc: Names 188 '1035' => ["MESSAGEID", 1], # Message-Id 189 '1042' => ["INREPLYTO", 1], # In reply to Message-Id 190 }; 191 192 use constant MAP_ADDRESSITEM_FILE => { 193 '3001' => ["NAME", 1], # Real name 194 '3002' => ["TYPE", 1], # Address type 195 '403D' => ["TYPE", 1], # Address type 196 '3003' => ["ADDRESS", 1], # Address 197 '403E' => ["ADDRESS", 1], # Address 198 '39FE' => ["SMTPADDRESS", 1], # SMTP Address variant 199 }; 200 201 # 202 # Main body of module 203 # 204 205 sub new { 206 my $that = shift; 207 my $class = ref $that || $that; 208 209 my $self = { 210 ATTACHMENTS => [], 211 ADDRESSES => [], 212 VERBOSE => 0, 213 HAS_UNICODE => 0, 214 FROM_ADDR_TYPE => "", 215 }; 216 bless $self, $class; 217 } 218 219 # 220 # Main sub: parse the PPS tree, and return 221 # 222 sub parse { 223 my $self = shift; 224 my $PPS = shift or die "Internal error: No PPS tree"; 225 $self->_RootDir($PPS); 226 } 227 228 sub mime_object { 229 my $self = shift; 230 231 my $bodymime; 232 my $mime; 233 234 if ($self->_IsMultiPart) { 235 # Construct a multipart message object 236 237 $mime = MIME::Entity->build(Type => "multipart/mixed"); 238 239 # Set the entity that we'll save the body parts to. If there's more than 240 # one part, it's a new entity, otherwise, it's the main $mime object. 241 if ($self->{BODY_HTML} and $self->{BODY_PLAIN}) { 242 $bodymime = MIME::Entity->build( 243 Type => "multipart/alternative", 244 Encoding => "8bit", 245 ); 246 $mime->add_part($bodymime); 247 } else { 248 $bodymime = $mime; 249 } 250 if ($self->{BODY_PLAIN}) { 251 $self->_SaveAttachment($bodymime, { 252 MIMETYPE => 'text/plain; charset=ISO-8859-1', 253 ENCODING => '8bit', 254 DATA => $self->{BODY_PLAIN}, 255 DISPOSITION => 'inline', 256 }); 257 } 258 if ($self->{BODY_HTML}) { 259 $self->_SaveAttachment($bodymime, { 260 MIMETYPE => 'text/html', 261 ENCODING => '8bit', 262 DATA => $self->{BODY_HTML}, 263 DISPOSITION => 'inline', 264 }); 265 } 266 foreach my $att (@{$self->{ATTACHMENTS}}) { 267 $self->_SaveAttachment($mime, $att); 268 } 269 } elsif ($self->{BODY_PLAIN}) { 270 # Construct a single part message object with a plain text body 271 $mime = MIME::Entity->build( 272 Type => "text/plain", 273 Data => $self->{BODY_PLAIN} 274 ); 275 } elsif ($self->{BODY_HTML}) { 276 # Construct a single part message object with an HTML body 277 $mime = MIME::Entity->build( 278 Type => "text/html", 279 Data => $self->{BODY_HTML} 280 ); 281 } 282 283 $self->_CopyHeaderData($mime); 284 285 $self->_SetHeaderFields($mime); 286 287 return $mime; 288 } 289 290 # Actually output the message in mbox format 291 sub print { 292 my $self = shift; 293 294 my $mime = $self->mime_object; 295 296 # Construct From line from whatever we know. 297 my $string = ""; 298 $string = ( 299 $self->{FROM_ADDR_TYPE} eq "SMTP" ? 300 $self->{FROM_ADDR} : 301 'someone@somewhere' 302 ); 303 $string =~ s/\n//g; 304 305 # The date used here is not really important. 306 print "From ", $string, " ", scalar localtime, "\n"; 307 $mime->print(\*STDOUT); 308 print "\n"; 309 } 310 311 sub set_verbosity { 312 my ($self, $verbosity) = @_; 313 defined $verbosity or die "Internal error: no verbosity level"; 314 $self->{VERBOSE} = $verbosity; 315 } 316 317 # 318 # Below are functions that walk the PPS tree. The *Dir functions handle 319 # processing the directory nodes of the tree (mainly, iterating over the 320 # children), whereas the *Item functions handle processing the items in the 321 # directory (if such an item is itself a directory, it will in turn be 322 # processed by the relevant *Dir function). 323 # 324 325 # 326 # RootItem: Check Root Entry, parse sub-entries. 327 # The OLE file consists of a single entry called Root Entry, which has 328 # several children. These children are parsed in the sub SubItem. 329 # 330 sub _RootDir { 331 my ($self, $PPS) = @_; 332 333 foreach my $child (@{$PPS->{Child}}) { 334 $self->_SubItem($child); 335 } 336 } 337 338 sub _SubItem { 339 my ($self, $PPS) = @_; 340 341 if ($PPS->{Type} == DIR_TYPE) { 342 $self->_SubItemDir($PPS); 343 } elsif ($PPS->{Type} == FILE_TYPE) { 344 $self->_SubItemFile($PPS); 345 } else { 346 warn "Unknown entry type: $PPS->{Type}"; 347 } 348 } 349 350 sub _SubItemDir { 351 my ($self, $PPS) = @_; 352 353 $self->_GetOLEDate($PPS); 354 355 my $name = $self->_GetName($PPS); 356 357 if ($name =~ /__recip_version1 0_ /) { # Address of one recipient 358 $self->_AddressDir($PPS); 359 } elsif ($name =~ '__attach_version1 0_ ') { # Attachment 360 $self->_AttachmentDir($PPS); 361 } else { 362 $self->_UnknownDir($self->_GetName($PPS)); 363 } 364 } 365 366 sub _SubItemFile { 367 my ($self, $PPS) = @_; 368 369 my $name = $self->_GetName($PPS); 370 my ($property, $encoding) = $self->_ParseItemName($name); 371 372 $self->_MapProperty($self, $PPS->{Data}, $property, 373 MAP_SUBITEM_FILE) or $self->_UnknownFile($name); 374 } 375 376 sub _AddressDir { 377 my ($self, $PPS) = @_; 378 379 my $address = { 380 NAME => undef, 381 ADDRESS => undef, 382 TYPE => "", 383 }; 384 foreach my $child (@{$PPS->{Child}}) { 385 $self->_AddressItem($child, $address); 386 } 387 push @{$self->{ADDRESSES}}, $address; 388 } 389 390 sub _AddressItem { 391 my ($self, $PPS, $addr_info) = @_; 392 393 my $name = $self->_GetName($PPS); 394 395 # DIR Entries: There should be none. 396 if ($PPS->{Type} == DIR_TYPE) { 397 $self->_UnknownDir($name); 398 } elsif ($PPS->{Type} == FILE_TYPE) { 399 my ($property, $encoding) = $self->_ParseItemName($name); 400 $self->_MapProperty($addr_info, $PPS->{Data}, $property, 401 MAP_ADDRESSITEM_FILE) or $self->_UnknownFile($name); 402 } else { 403 warn "Unknown entry type: $PPS->{Type}"; 404 } 405 } 406 407 sub _AttachmentDir { 408 my ($self, $PPS) = @_; 409 410 my $attachment = { 411 SHORTNAME => undef, 412 LONGNAME => undef, 413 MIMETYPE => 'application/octet-stream', 414 ENCODING => 'base64', 415 DISPOSITION => 'attachment', 416 DATA => undef 417 }; 418 foreach my $child (@{$PPS->{Child}}) { 419 $self->_AttachmentItem($child, $attachment); 420 } 421 push @{$self->{ATTACHMENTS}}, $attachment; 422 } 423 424 sub _AttachmentItem { 425 my ($self, $PPS, $att_info) = @_; 426 427 my $name = $self->_GetName($PPS); 428 429 my ($property, $encoding) = $self->_ParseItemName($name); 430 431 if ($PPS->{Type} == DIR_TYPE) { 432 433 if ($property eq '3701') { # Nested MSG file 434 my $msgp = new MSGParser(); 435 $msgp->parse($PPS); 436 my $data = $msgp->mime_object->as_string; 437 $att_info->{DATA} = $data; 438 $att_info->{MIMETYPE} = 'message/rfc822'; 439 $att_info->{ENCODING} = '8bit'; 440 } else { 441 $self->_UnknownDir($name); 442 } 443 444 } elsif ($PPS->{Type} == FILE_TYPE) { 445 $self->_MapProperty($att_info, $PPS->{Data}, $property, 446 MAP_ATTACHMENT_FILE) or $self->_UnknownFile($name); 447 } else { 448 warn "Unknown entry type: $PPS->{Type}"; 449 } 450 } 451 452 sub _MapProperty { 453 my ($self, $hash, $data, $property, $map) = @_; 454 455 defined $property or return 0; 456 my $arr = $map->{$property} or return 0; 457 458 $arr->[1] and $data =~ s/\000//g; 459 $hash->{$arr->[0]} = $data; 460 461 return 1; 462 } 463 464 sub _UnknownDir { 465 my ($self, $name) = @_; 466 467 if ($name eq '__nameid_version1 0') { 468 $self->{VERBOSE} 469 and warn "Skipping DIR entry $name (Introductory stuff)\n"; 470 return; 471 } 472 warn "Unknown DIR entry $name\n"; 473 } 474 475 sub _UnknownFile { 476 my ($self, $name) = @_; 477 478 if ($name eq '__properties_version1 0') { 479 $self->{VERBOSE} 480 and warn "Skipping FILE entry $name (Properties)\n"; 481 return; 482 } 483 484 my ($property, $encoding) = $self->_ParseItemName($name); 485 unless (defined $property) { 486 warn "Unknown FILE entry $name\n"; 487 return; 488 } 489 if ($skipproperties->{$property}) { 490 $self->{VERBOSE} 491 and warn "Skipping property $property ($skipproperties->{$property})\n"; 492 return; 493 } elsif ($property =~ /^80/) { 494 $self->{VERBOSE} 495 and warn "Skipping property $property (user-defined property)\n"; 496 return; 497 } else { 498 warn "Unknown property $property\n"; 499 return; 500 } 501 } 502 503 # 504 # Helper functions 505 # 506 507 sub _GetName { 508 my ($self, $PPS) = @_; 509 return $self->_NormalizeWhiteSpace(OLE::Storage_Lite::Ucs2Asc($PPS->{Name})); 510 } 511 512 sub _NormalizeWhiteSpace { 513 my ($self, $name) = @_; 514 $name =~ s/\W/ /g; 515 return $name; 516 } 517 518 sub _GetOLEDate { 519 my ($self, $PPS) = @_; 520 unless (defined ($self->{OLEDATE})) { 521 # Make Date 522 my $datearr; 523 $datearr = $PPS->{Time2nd}; 524 $datearr = $PPS->{Time1st} unless($datearr); 525 $self->{OLEDATE} = $self->_FormatDate($datearr) if $datearr; 526 } 527 } 528 529 sub _FormatDate { 530 my ($self, $datearr) = @_; 531 532 # TODO: This is a little convoluted. Directly using strftime didn't seem 533 # to work. 534 my $datetime = mktime(@$datearr); 535 return time2str("%a, %d %h %Y %X %z", $datetime); 536 } 537 538 # If we didn't get the date from the original header data, we may be able 539 # to get it from the SUBMISSION_ID: 540 # It seems to have the format of a semicolon-separated list of key=value 541 # pairs. The key l has a value with the format: 542 # <SERVER>-<DATETIME>Z-<NUMBER>, where DATETIME is the date and time in 543 # the format YYMMDDHHMMSS. 544 sub _SubmissionIdDate { 545 my $self = shift; 546 547 my $submission_id = $self->{SUBMISSION_ID} or return undef; 548 $submission_id =~ m/l=.*-(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)Z-.*/ 549 or return undef; 550 my $year = $1; 551 $year += 100 if $year < 20; 552 return $self->_FormatDate([$6,$5,$4,$3,$2-1,$year]); 553 } 554 555 sub _ParseItemName { 556 my ($self, $name) = @_; 557 558 if ($name =~ /^__substg1 0_(....)(....)$/) { 559 my ($property, $encoding) = ($1, $2); 560 if ($encoding eq ENCODING_UNICODE and not ($self->{HAS_UNICODE})) { 561 warn "This MSG file contains Unicode fields." 562 . " This is currently unsupported.\n"; 563 $self->{HAS_UNICODE} = 1; 564 } elsif (not (KNOWN_ENCODINGS()->{$encoding})) { 565 warn "Unknown encoding $encoding. Results may be strange or wrong.\n"; 566 } 567 return ($property, $encoding); 568 } else { 569 return (undef, undef); 570 } 571 } 572 573 sub _SaveAttachment { 574 my ($self, $mime, $att) = @_; 575 576 my $ent = $mime->attach( 577 Type => $att->{MIMETYPE}, 578 Encoding => $att->{ENCODING}, 579 Data => [], 580 Filename => ($att->{LONGNAME} ? $att->{LONGNAME} : $att->{SHORTNAME}), 581 Disposition => $att->{DISPOSITION} 582 ); 583 584 my $handle; 585 if ($handle = $ent->open("w")) { 586 $handle->print($att->{DATA}); 587 $handle->close; 588 } else { 589 warn "Could not write data!"; 590 } 591 } 592 593 sub _SetAddressPart { 594 my ($self, $adrname, $partname, $data) = @_; 595 596 my $address = $self->{ADDRESSES}->{$adrname}; 597 $data =~ s/\000//g; 598 #warn "Processing address data part $partname : $data\n"; 599 if (defined ($address->{$partname})) { 600 if ($address->{$partname} eq $data) { 601 warn "Skipping duplicate but identical address information for" 602 . " $partname\n" if $self->{VERBOSE}; 603 } else { 604 warn "Address information $partname inconsistent:\n"; 605 warn " Original data: $address->{$partname}\n"; 606 warn " New data: $data\n"; 607 } 608 } else { 609 $address->{$partname} = $data; 610 } 611 } 612 613 # Set header fields 614 sub _AddHeaderField { 615 my ($self, $mime, $fieldname, $value) = @_; 616 617 my $oldvalue = $mime->head->get($fieldname); 618 return if $oldvalue; 619 $mime->head->add($fieldname, $value) if $value; 620 } 621 622 sub _Address { 623 my ($self, $tag) = @_; 624 my $name = $self->{$tag} || ""; 625 my $address = $self->{$tag . "_ADDR"} || ""; 626 return "$name <$address>"; 627 } 628 629 # Find SMTP addresses for the given list of names 630 sub _ExpandAddressList { 631 my ($self, $names) = @_; 632 633 my $addresspool = $self->{ADDRESSES}; 634 my @namelist = split /; */, $names; 635 my @result; 636 name: foreach my $name (@namelist) { 637 foreach my $address (@$addresspool) { 638 if ($name eq $address->{NAME}) { 639 my $addresstext = $address->{NAME} . " <"; 640 if (defined ($address->{SMTPADDRESS})) { 641 $addresstext .= $address->{SMTPADDRESS}; 642 } elsif ($address->{TYPE} eq "SMTP") { 643 $addresstext .= $address->{ADDRESS}; 644 } 645 $addresstext .= ">"; 646 push @result, $addresstext; 647 next name; 648 } 649 } 650 push @result, $name; 651 } 652 return join ", ", @result; 653 } 654 655 sub _ParseHead { 656 my ($self, $data) = @_; 657 defined $data or return undef; 658 # Parse full header date if we got that. 659 my $parser = new MIME::Parser(); 660 $parser->output_to_core(1); 661 $parser->decode_headers(1); 662 $data =~ s/^Microsoft Mail.*$/X-MSGConvert: yes/m; 663 my $entity = $parser->parse_data($data) 664 or warn "Couldn't parse full headers!"; 665 my $head = $entity->head; 666 $head->unfold; 667 return $head; 668 } 669 670 # Find out if we need to construct a multipart message 671 sub _IsMultiPart { 672 my $self = shift; 673 674 return ( 675 ($self->{BODY_HTML} and $self->{BODY_PLAIN}) 676 or @{$self->{ATTACHMENTS}}>0 677 ); 678 } 679 680 # Copy original header data. 681 # Note: This should contain the Date: header. 682 sub _CopyHeaderData { 683 my ($self, $mime) = @_; 684 685 my $head = $self->_ParseHead($self->{HEAD}) or return; 686 687 foreach my $tag (grep {!$skipheaders->{$_}} $head->tags) { 688 foreach my $value ($head->get_all($tag)) { 689 $mime->head->add($tag, $value); 690 } 691 } 692 } 693 694 # Set header fields 695 sub _SetHeaderFields { 696 my ($self, $mime) = @_; 697 698 # If we didn't get the date from the original header data, we may be able 699 # to get it from the SUBMISSION_ID: 700 $self->_AddHeaderField($mime, 'Date', $self->_SubmissionIdDate()); 701 702 # Third and last chance to set the Date: header; this uses the date the 703 # MSG file was saved. 704 $self->_AddHeaderField($mime, 'Date', $self->{OLEDATE}); 705 $self->_AddHeaderField($mime, 'Subject', $self->{SUBJECT}); 706 $self->_AddHeaderField($mime, 'From', $self->_Address("FROM")); 707 #$self->_AddHeaderField($mime, 'Reply-To', $self->_Address("REPLYTO")); 708 $self->_AddHeaderField($mime, 'To', $self->_ExpandAddressList($self->{TO})); 709 $self->_AddHeaderField($mime, 'Cc', $self->_ExpandAddressList($self->{CC})); 710 $self->_AddHeaderField($mime, 'Message-Id', $self->{MESSAGEID}); 711 $self->_AddHeaderField($mime, 'In-Reply-To', $self->{INREPLYTO}); 712 } 713 714 package main; 715 use Getopt::Long; 716 use Pod::Usage; 717 718 # Setup command line processing. 719 my $verbose = ''; 720 my $help = ''; # Print help message and exit. 721 GetOptions('verbose' => \$verbose, 'help|?' => \$help) or pod2usage(2); 722 pod2usage(1) if $help; 723 724 # Get file name 725 my $file = $ARGV[0]; 726 defined $file or pod2usage(2); 727 warn "Will parse file: $file\n" if $verbose; 728 729 # Load and parse MSG file (is OLE) 730 my $Msg = OLE::Storage_Lite->new($file); 731 my $PPS = $Msg->getPpsTree(1); 732 $PPS or die "$file must be an OLE file"; 733 734 # parse PPS tree 735 my $parser = new MSGParser(); 736 $parser->set_verbosity(1) if $verbose; 737 $parser->parse($PPS); 738 $parser->print(); 739 740 # 741 # Usage info follows. 742 # 743 __END__ 744 745 =head1 NAME 746 747 msgconvert.pl - Convert Outlook .msg files to mbox format 748 749 =head1 SYNOPSIS 750 751 msgconvert.pl [options] <file.msg> 752 753 Options: 754 --verbose be verbose 755 --help help message 756 757 =head1 OPTIONS 758 759 =over 8 760 761 =item B<--verbose> 762 763 Print information about skipped parts of the .msg file. 764 765 =item B<--help> 766 767 Print a brief help message. 768 769 =head1 DESCRIPTION 770 771 This program will output the message contained in file.msg in mbox format 772 on stdout. It will complain about unrecognized OLE parts on 773 stderr. 774 775 =head1 BUGS 776 777 Not all data that's in the .MSG file is converted. There simply are some 778 parts whose meaning escapes me. One of these must contain the date the 779 message was sent, for example. Formatting of text messages will also be 780 lost. YMMV. 781 782 =cut -
xapian-omega-1.0.7a/omega.cc
diff -u xapian-omega-1.0.7a/omega.cc.orig
old new 172 172 const string & v = val->second; 173 173 if (v == "AND" || v == "and") 174 174 default_op = Xapian::Query::OP_AND; 175 else if (v == "OR" || v == "or") 176 default_op = Xapian::Query::OP_OR; 177 } else { 178 default_op = Xapian::Query::OP_AND; 175 179 } 176 180 177 181 val = cgi_params.find("FMT"); … … 264 268 } 265 269 } 266 270 271 // filter by URL substring 272 val = cgi_params.find("U"); 273 if (val != cgi_params.end()) { 274 string url = val->second; 275 if (!url.empty()) { 276 filters += ("U" + url + "*"); 277 filters += filter_sep; 278 } 279 } 280 267 281 // date range filters 268 282 val = cgi_params.find("START"); 269 283 if (val != cgi_params.end()) date_start = val->second; -
xapian-omega-1.0.7a/omega.conf.in
diff -u xapian-omega-1.0.7a/omega.conf.in.orig
old new 1 # Directory containing Xapian databases: 2 database_dir @localstatedir@/omega/data 3 4 # Directory containing OmegaScript templates: 5 template_dir @localstatedir@/omega/templates 6 7 # Directory to write Omega logs to: 8 log_dir /var/log/omega 9 10 # Directory containing any cdb files for the $lookup OmegaScript command: 11 cdb_dir @localstatedir@/omega/cdb 12 13 # Directory containing extracted archives: 14 cache_dir @localstatedir@/omega/cache -
xapian-omega-1.0.7a/omega.m4
diff -u xapian-omega-1.0.7a/omega.m4.orig
old new 1 # macro to check for omindex helpers 2 # serial 1 3 4 # XO_OMEGA_WITH(package, description, [action-if-with-and-found]) 5 # -------------------------------------------------------- 6 # AC_ARG_WITH(package, --without-package description) 7 # AC_PATH_PROG(PACKAGE, package) 8 # AC_DEFINE(HAVE_PACKAGE,[], "Define to 1 if you have " description) 9 # AC_DEFINE_UNQUOTED(PACKAGE,"$PACKAGE",package " path") 10 # The progname to search for is the same as the package. 11 # 12 # action-if-with-and-found is executed when --without-package is 13 # not given and the program is found in the path. For additional AC_SUBST 14 # 15 # This simplifies the omindex prerequisite definition. 16 AC_DEFUN([XO_OMEGA_WITH], 17 [ 18 AC_ARG_WITH([$1],[ --without-$1 use $1 to $2]) 19 define([$1NAME],[translit([$1],[a-z], [A-Z])]) 20 if test xno != x$with_$1; then 21 AC_PATH_PROG([$1NAME],[$1]) 22 test -n "$[]$1NAME" && ( 23 AC_DEFINE([HAVE_]$1NAME, 1, [Define to 1 if you have $1 to $2]) 24 AC_DEFINE_UNQUOTED($1NAME,"$[]$1NAME",[path to $1]) 25 ) 26 $3 27 fi 28 undefine([$1NAME]) 29 ]) 30 31 # XO_OMEGA_WITH_PROGS(package, progs, description, [action-if-with-and-found]) 32 # -------------------------------------------------------- 33 # AC_ARG_WITH(package, --without-package description) 34 # AC_PATH_PROGS(PACKAGE, progs) 35 # AC_DEFINE(HAVE_PACKAGE,[], "Define to 1 if you have " description) 36 # AC_DEFINE_UNQUOTED(PACKAGE,"$PACKAGE",package " path") 37 # The progname to search for is the same as the package. 38 # 39 # action-if-with-and-found is executed when --without-package is 40 # not given and the program is found in the path. For additional AC_SUBST 41 # 42 # This simplifies the omindex prerequisite definition. 43 AC_DEFUN([XO_OMEGA_WITH_PROGS], 44 [ 45 AC_ARG_WITH([$1],[ --without-$1 use $1 to $3]) 46 define([$1NAME],[translit([$1],[a-z], [A-Z])]) 47 if test xno != x$with_$1; then 48 AC_PATH_PROGS([$1NAME],$2) 49 test -n "$[]$1NAME" && ( 50 AC_DEFINE([HAVE_]$1NAME, 1, [Define to 1 if you have $1 to $3]) 51 AC_DEFINE_UNQUOTED($1NAME,"$[]$1NAME",[path to $1]) 52 ) 53 $4 54 fi 55 undefine([$1NAME]) 56 ]) -
xapian-omega-1.0.7a/omindex.cc
diff -u xapian-omega-1.0.7a/omindex.cc.orig
old new 4 4 * Copyright 2001,2005 James Aylett 5 5 * Copyright 2001,2002 Ananova Ltd 6 6 * Copyright 2002,2003,2004,2005,2006,2007,2008 Olly Betts 7 * Copyright 2006,2007,2008 AVL List GesmbH 7 8 * 8 9 * This program is free software; you can redistribute it and/or 9 10 * modify it under the terms of the GNU General Public License as … … 23 24 24 25 #include <config.h> 25 26 27 #ifdef HAVE_POSIX_FADVISE 28 # ifdef __linux__ 29 # define _POSIX_C_SOURCE 200112L // for posix_fadvise from fcntl.h 30 # define _BSD_SOURCE 1 // Need this to get lstat() as well 31 # endif 32 #endif 26 33 #include <algorithm> 27 34 #include <fstream> 28 35 #include <iostream> … … 42 49 #include <xapian.h> 43 50 44 51 #include "commonhelp.h" 52 #include "configfile.h" 45 53 #include "diritor.h" 46 54 #include "hashterm.h" 47 55 #include "loadfile.h" … … 62 70 extern char * mkdtemp(char *); 63 71 #endif 64 72 73 #ifdef HAVE_TEXTCAT 74 # include "textcat.h" 75 # ifndef LANGCLASS_CONF 76 # define LANGCLASS_CONF "/var/lib/omega/langclass/langclass.conf" 77 # endif 78 #endif 79 #ifndef LIBEXECDIR 80 // must have ending slash 81 //# define LIBEXECDIR "/usr/lib/omega/bin/" 82 # define LIBEXECDIR "" 83 #endif 84 #ifndef PKGDATADIR 85 // must have ending slash 86 # define PKGDATADIR "/usr/share/omega/" 87 #endif 88 65 89 using namespace std; 66 90 67 91 #define TITLE_SIZE 128 … … 69 93 70 94 #define PROG_NAME "omindex" 71 95 #define PROG_DESC "Index static website data via the filesystem" 96 97 /* used in runfilter.cc */ 98 bool verbose = false; 99 string error_log; 72 100 73 101 static bool skip_duplicates = false; 74 102 static bool follow_symlinks = false; 103 static bool nocleanup = false; 104 static bool silent = false; 75 105 static string dbpath; 76 106 static string root; 77 107 static string indexroot; … … 129 159 static vector<bool> updated; 130 160 static string tmpdir; 131 161 162 #ifdef HAVE_TEXTCAT 163 static void *textcat; 164 #endif 165 166 static void 167 index_cached_directory(size_t depth_limit, 168 const string &file, 169 const string &url, 170 const string &ext, 171 const string &cmd, 172 map<string, string>& mime_map); 173 static 174 int mkdir_p(const string &path, mode_t mode); 175 132 176 inline static bool 133 177 p_notalnum(unsigned int c) 134 178 { … … 179 223 { 180 224 string file = root + url; 181 225 string title, sample, keywords, dump; 226 string language; 182 227 183 cout << "Indexing \"" << url << "\" as " << mimetype << " ... " << flush; 228 if (!silent) 229 cout << "Indexing \"" << url.substr(1) << "\" as " << mimetype << " ... " << flush; 184 230 185 231 string urlterm("U"); 186 232 urlterm += baseurl; … … 217 263 // indexing is disallowed 218 264 } 219 265 if (!p.indexing_allowed) { 220 cout << "indexing disallowed by meta tag - skipping" << endl; 266 if (!silent) 267 cout << "indexing disallowed by meta tag - skipping" << endl; 221 268 return; 222 269 } 223 270 dump = p.dump; … … 245 292 return; 246 293 } 247 294 md5_string(dump, md5); 295 #if 0 // FIXME: this won't work as omindex will have the database locked... 296 } else if (mimetype == "message/rfc822") { // // => mbox2script 297 //for stemmer lang, parse stemmer.get_description => Xapian::Stem(bla) 298 string cmd = LIBEXECDIR"mbox2omega " + shell_protect(file) + error_log+"| " 299 "scriptindex " + shell_protect(dbpath) + " "PKGDATADIR"mbox2script.script"; 300 try { 301 dump = stdout_to_string(cmd); 302 } catch (ReadError) { 303 cout << "\"" << cmd << "\" failed - skipping" << endl; 304 return; 305 } 306 #endif 248 307 } else if (mimetype == "application/pdf") { 249 308 string safefile = shell_protect(file); … … 383 442 } else if (mimetype == "text/rtf") { 384 443 // The --text option unhelpfully converts all non-ASCII characters to 385 444 // "?" so we use --html instead, which produces HTML entities. 386 string cmd = "unrtf --nopict --html 2>/dev/null " + shell_protect(file); 445 string cmd = "unrtf --nopict --html 2>/dev/null " + shell_protect(file) + error_log+ 446 "|"SED" '/^### .*/d'"; 387 447 MyHtmlParser p; 388 448 try { 389 449 p.parse_html(stdout_to_string(cmd)); … … 466 526 467 527 // Put the data in the document 468 528 Xapian::Document newdocument; 469 string record = "url=" + baseurl + url + "\nsample=" + sample; 529 string record = "url=" + baseurl + url; 530 record += "\nsample=" + sample; 531 #ifdef HAVE_TEXTCAT 532 char * lang; 533 lang = textcat_Classify( textcat, sample.c_str(), sample.length()+1 ); 534 language = string(lang); 535 if ((language != _TEXTCAT_RESULT_UNKOWN) // unknown language 536 && (language != _TEXTCAT_RESULT_SHORT)) // too little information 537 { 538 if (language[0] == '[') { 539 int pos = language.find(']',0); 540 language = language.substr(1,pos-1); 541 } 542 record += "\nlanguage=" + language; 543 if (language != curr_lang) { 544 cout << "curr_lang=" << language << ", "; 545 stemmer = Xapian::Stem(language); 546 curr_lang = language; 547 } 548 } 549 #endif 470 550 if (!title.empty()) { 471 551 record += "\ncaption=" + generate_sample(title, TITLE_SIZE); 472 552 } … … 529 609 // Add MD5 as a value to allow duplicate documents to be collapsed together. 530 610 newdocument.add_value(VALUE_MD5, md5); 531 611 612 // filesize also to sort by size 613 newdocument.add_value(VALUE_FILESIZE, int_to_binary_string((uint32_t)size)); 614 532 615 if (!skip_duplicates) { 533 616 // If this document has already been indexed, update the existing 534 617 // entry. … … 536 619 Xapian::docid did = db.replace_document(urlterm, newdocument); 537 620 if (did < updated.size()) { 538 621 updated[did] = true; 622 if (!silent) 539 623 cout << "updated." << endl; 540 624 } else { 625 if (!silent) 541 626 cout << "added." << endl; 542 627 } 543 628 } catch (...) { 544 629 // FIXME: is this ever actually needed? 545 630 db.add_document(newdocument); 631 if (!silent) 546 632 cout << "added (failed re-seek for duplicate)." << endl; 547 633 } 548 634 } else { 549 635 // If this were a duplicate, we'd have skipped it above. 550 636 db.add_document(newdocument); 637 if (!silent) 551 638 cout << "added." << endl; 552 639 } 553 640 } 554 641 642 /* Note: switched to cache_dir as root for virtual directories, 643 because /srcdir/.zip might not be creatable. */ 555 644 static void 556 645 index_directory(size_t depth_limit, const string &dir, 557 646 map<string, string>& mime_map) 558 647 { 559 648 string path = root + indexroot + dir; 560 649 561 cout << "[Entering directory " << dir << "]" << endl; 650 if (!silent) 651 cout << "[Entering directory " << dir.substr(1) << "]" << endl; 562 652 563 653 DirectoryIterator d(follow_symlinks); 564 654 try { 565 d.start(path); 566 while (d.next()) try { 567 string url = dir; 568 if (!url.empty() && url[url.size() - 1] != '/') url += '/'; 569 url += d.leafname(); 570 string file = root + indexroot + url; 571 switch (d.get_type()) { 572 case DirectoryIterator::DIRECTORY: 573 if (depth_limit == 1) continue; 574 try { 575 size_t new_limit = depth_limit; 576 if (new_limit) --new_limit; 577 index_directory(new_limit, url, mime_map); 578 } catch (...) { 579 cout << "Caught unknown exception in index_directory, rethrowing" << endl; 580 throw; 655 d.start(root + indexroot + dir); 656 } catch (const std::string & error) { 657 cout << error << " - skipping" << endl; 658 return; 659 } 660 while (d.next()) try { 661 struct stat statbuf; 662 string url = dir; 663 if (!url.empty() && url[url.size() - 1] != '/') url += '/'; 664 url += d.leafname(); 665 string file = root + indexroot + url; 666 switch (d.get_type()) { 667 case DirectoryIterator::DIRECTORY: 668 if (depth_limit == 1) continue; 669 try { 670 size_t new_limit = depth_limit; 671 if (new_limit) --new_limit; 672 index_directory(new_limit, url, mime_map); 673 } catch (...) { 674 cout << "Caught unknown exception in index_directory, rethrowing" << endl; 675 throw; 676 } 677 continue; 678 case DirectoryIterator::REGULAR_FILE: { 679 string ext; 680 string::size_type dot = url.find_last_of('.'); 681 if (dot != string::npos) ext = url.substr(dot + 1); 682 if (!ext.empty()) { 683 ext = string(ext); // lowercase ext 684 for (unsigned int i=0; i<ext.length(); i++) { 685 ext[i] = tolower(ext[i]); 581 686 } 582 case DirectoryIterator::REGULAR_FILE: { 583 string ext; 584 string::size_type dot = url.find_last_of('.'); 585 if (dot != string::npos) ext = url.substr(dot + 1); 586 587 map<string,string>::iterator mt = mime_map.find(ext); 588 if (mt == mime_map.end()) { 589 // If the extension isn't found, see if the lower-cased 590 // version (if different) is found. 591 bool changed = false; 592 string::iterator i; 593 for (i = ext.begin(); i != ext.end(); ++i) { 594 if (*i >= 'A' && *i <= 'Z') { 595 *i = tolower(*i); 596 changed = true; 597 } 687 } 688 689 if (strcmp(d.leafname(), "mbox") == 0) { 690 // Special filename. 691 off_t size = d.get_size(); 692 time_t mtime = d.get_mtime(); 693 index_file(indexroot + url, "message/rfc822", mtime, size); 694 continue; 695 } 696 697 map<string,string>::iterator mt = mime_map.find(ext); 698 if (mt == mime_map.end()) { 699 // If the extension isn't found, see if the lower-cased 700 // version (if different) is found. 701 bool changed = false; 702 string::iterator i; 703 for (i = ext.begin(); i != ext.end(); ++i) { 704 if (*i >= 'A' && *i <= 'Z') { 705 *i = tolower(*i); 706 changed = true; 707 } 708 } 709 if (changed) mt = mime_map.find(ext); 710 } 711 if (mt != mime_map.end()) { 712 string oldroot = root; 713 // Only check the file size if we recognise the 714 // extension to avoid a call to stat()/lstat() for 715 // files we can't handle when readdir() tells us the 716 // file type. 717 off_t size = d.get_size(); 718 if (size == 0) { 719 cout << "Skipping empty file: \"" << file << "\"" 720 << endl; 721 continue; 722 } 723 724 #ifndef _MSC_VER 725 // NOTE: unpacking does not work on MSWin32 this way! 726 // we'd really have to pull in utils.cc:rmdir from xapian-core 727 if (ext == "zip") { 728 if (depth_limit == 1) { 729 cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl; 730 continue; 731 } 732 #ifdef HAVE_UNZIP 733 // overwrite 734 string cmd = UNZIP" -u -P. -o " +shell_protect(file) + " -d " +shell_protect(cache_dir+"/.zip"+indexroot+url+"/"); 735 try { 736 size_t new_limit = depth_limit; 737 if (new_limit) --new_limit; 738 index_cached_directory(new_limit, file, url, ext, cmd, mime_map); 739 } catch (ReadError) { 740 cout << "failed " << cmd << " << in index_cached_directory" << endl; 741 root = oldroot; 742 } catch (...) { 743 cout << "Caught unknown exception in index_cached_directory, rethrowing" << endl; 744 root = oldroot; 745 throw; 598 746 } 599 if (changed) mt = mime_map.find(ext); 747 continue; 748 #endif 600 749 } 601 if (mt != mime_map.end()) { 602 // Only check the file size if we recognise the 603 // extension to avoid a call to stat()/lstat() for 604 // files we can't handle when readdir() tells us the 605 // file type. 606 off_t size = d.get_size(); 607 if (size == 0) { 608 cout << "Skipping empty file: \"" << file << "\"" 609 << endl; 750 #ifdef HAVE_UNRAR 751 else if (ext == "rar") { 752 if (depth_limit == 1) { 753 cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl; 610 754 continue; 611 755 } 612 613 // It's in our MIME map so we know how to index it. 614 const string & mimetype = mt->second; 756 // overwrite 757 string cmd = UNRAR" x -o+ " +shell_protect(file) + " " 758 + shell_protect(cache_dir+"/.rar"+indexroot+url+"/"); 759 try { 760 size_t new_limit = depth_limit; 761 if (new_limit) --new_limit; 762 index_cached_directory(new_limit, file, url, ext, cmd, mime_map); 763 } catch (ReadError) { 764 cout << "failed " << cmd << " << in index_cached_directory" << endl; 765 root = oldroot; 766 } catch (...) { 767 cout << "Caught unknown exception in index_cached_directory, rethrowing" << endl; 768 root = oldroot; 769 throw; 770 } 771 continue; 772 } 773 #endif 774 #ifdef HAVE_MSGCONVERT 775 else if (ext == "msg") { 776 struct stat statcache; 777 char olddir[256]; 778 779 if (depth_limit == 1) { 780 cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl; 781 continue; 782 } 783 string cmd = LIBEXECDIR"outlook2text "+shell_protect(file); 784 // unpack multiparts and attachments. so we have to chdir first 785 string fulldir = cache_dir+"/.msg"+indexroot+url; 786 getcwd(olddir,256); 787 #ifdef HAVE_LSTAT 788 lstat(fulldir.c_str(), &statcache); 789 #else 790 stat(fulldir.c_str(), &statcache); 791 #endif 792 if (!S_ISDIR(statcache.st_mode)) { 793 mkdir_p(fulldir, 0755); 794 } 615 795 try { 616 time_t mtime = d.get_mtime(); 617 index_file(indexroot + url, mimetype, mtime, size); 618 } catch (NoSuchFilter) { 619 // FIXME: we ought to ignore by mime-type not 620 // extension. 621 cout << "Filter for \"" << mimetype 622 << "\" not installed - ignoring extension \"" 623 << ext << "\"" << endl; 624 mime_map.erase(mt); 625 } 626 } else { 627 cout << "Unknown extension: \"" << file 628 << "\" - skipping" << endl; 796 chdir (fulldir.c_str()); 797 size_t new_limit = depth_limit; 798 if (new_limit) --new_limit; 799 index_cached_directory(new_limit, file, url, ext, cmd, mime_map); 800 chdir (olddir); 801 } catch (ReadError) { 802 cout << "failed " << cmd << " << in index_cached_directory" << endl; 803 chdir (olddir); 804 root = oldroot; 805 } catch (...) { 806 cout << "Caught unknown exception in index_cached_directory, rethrowing" << endl; 807 chdir (olddir); 808 root = oldroot; 809 throw; 810 } 811 continue; 629 812 } 630 continue; 631 } 632 default: 633 cout << "Not a regular file \"" << file 813 #endif 814 #ifdef HAVE_READPST 815 else if (ext == "pst") { 816 if (depth_limit == 1) { 817 cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl; 818 continue; 819 } 820 // unpack attachments also, together with mbox files 821 string cmd = READPST" -r -cv -w -o " 822 + shell_protect(cache_dir+"/.pst"+indexroot+url+"/")+" "+shell_protect(file); 823 try { 824 size_t new_limit = depth_limit; 825 if (new_limit) --new_limit; 826 index_cached_directory(new_limit, file, url, ext, cmd, mime_map); 827 } catch (ReadError) { 828 root = oldroot; 829 cout << "failed " << cmd << " << in index_cached_directory" << endl; 830 } catch (...) { 831 root = oldroot; 832 cout << "Caught unknown exception in index_cached_directory, rethrowing" << endl; 833 throw; 834 } 835 continue; 836 } 837 #endif 838 #endif 839 // It's in our MIME map so we know how to index it. 840 const string & mimetype = mt->second; 841 try { 842 time_t mtime = d.get_mtime(); 843 index_file(indexroot + url, mimetype, mtime, size); 844 } catch (NoSuchFilter) { 845 // FIXME: we ought to ignore by mime-type not 846 // extension. 847 cout << "Filter for \"" << mimetype 848 << "\" not installed - ignoring extension \"" 849 << ext << "\"" << endl; 850 mime_map.erase(mt); 851 } 852 } else { 853 cout << "Unknown extension: \"" << file 634 854 << "\" - skipping" << endl; 855 } 856 continue; 635 857 } 636 } catch (const std::string & error) {637 cout << error << " - skipping" << endl;638 continue;858 default: 859 cout << "Not a regular file \"" << file 860 << "\" - skipping" << endl; 639 861 } 640 862 } catch (const std::string & error) { 641 cout << error << " - skipping directory" << endl; 642 return; 863 cout << error << " - skipping" << endl; 864 continue; 865 } 866 } 867 868 static 869 int mkdir_p(const string &path, mode_t mode) { 870 #ifdef __WIN32__ 871 stdout_to_string("mkdir \""+shell_protect(path)+"\""); 872 #else 873 stdout_to_string("mkdir -p "+shell_protect(path)); 874 #endif 875 return 0; 876 } 877 878 /* 879 * unpack .msg/.pst/.rar/.zip into local cache dir and recurse there 880 */ 881 static void 882 index_cached_directory(size_t depth_limit, 883 const string &file, 884 const string &url, 885 const string &ext, 886 const string &cmd, 887 map<string, string>& mime_map) 888 { 889 string oldroot = root; 890 root = cache_dir; 891 string cache = root+"/."+ext+indexroot; 892 string cachedir = cache+url; 893 struct stat statfile, statcache; 894 bool extract_cache; 895 #ifdef HAVE_LSTAT 896 lstat(file.c_str(), &statfile); 897 lstat(cachedir.c_str(), &statcache); 898 #else 899 stat(file.c_str(), &statfile); 900 stat(cachedir.c_str(), &statcache); 901 #endif 902 extract_cache = true; 903 // if cachedir exists and if file is older as cachedir and if cachedir existed 5 secs ago, 904 // then it was already extracted. 905 if (S_ISDIR(statcache.st_mode) 906 && S_ISREG(statfile.st_mode) 907 && (statfile.st_mtime < statcache.st_mtime) 908 && (statcache.st_mtime < (time_t)(time(NULL)-500))) // not created by nested mkdir call 909 { 910 // but is it in the database also? prevent from deleting skipped files 911 if (!silent) 912 cout << "Unchanged cache \"" << cachedir << "\" - \"" << file << "\" - skip extraction " 913 // << statfile.st_mtime << " < " << statcache.st_mtime 914 << endl; 915 extract_cache = false; 916 } 917 if (S_ISDIR(statcache.st_mode) && S_ISREG(statfile.st_mode) ) 918 { 919 // check database timestamp for cached container, esp. for cleaned up caches. 920 // if already in db we need not to extract again 921 string urlterm("U"); 922 urlterm += baseurl; 923 urlterm += "/."+ext+indexroot+url; 924 if (urlterm.length() > MAX_SAFE_TERM_LENGTH) 925 urlterm = hash_long_term(urlterm, MAX_SAFE_TERM_LENGTH); 926 927 { 928 // at first find the docid with the beginning urlterm and check its timestamp 929 Xapian::docid docid = 0; 930 Xapian::PostingIterator p = db.postlist_begin(urlterm); 931 if (p != db.postlist_end(urlterm)) { 932 docid = *p; 933 } 934 if (docid && !ignore_time) { 935 // new: first search value (1) 936 Xapian::Document doc = db.get_document(docid); 937 string lastmod; 938 if (doc.values_count()) 939 lastmod = doc.get_value(VALUE_LASTMOD); 940 if (!lastmod.empty()) { 941 if (string_to_int(lastmod) >= statfile.st_mtime) { 942 if (!silent) 943 cout << "Cache "<< "."+ext+indexroot+url << " not newer. Ignored." << endl; 944 if (docid < updated.size()) { 945 updated[docid] = true; 946 root = oldroot; 947 return; 948 } 949 } 950 } 951 } 952 } 953 } 954 955 if (extract_cache) { 956 if (!silent) 957 cout << "[EXTRACT into cache " << shell_protect(cachedir) << "]" << endl; 958 if (verbose && S_ISDIR(statcache.st_mode) && S_ISREG(statfile.st_mode)) 959 cout << " ...changed cache \"" << cachedir << "\" - \"" << file << "\" " 960 << statfile.st_mtime << " < " << statcache.st_mtime << " time: " << time(NULL) 961 << endl; 962 if (!S_ISDIR(statcache.st_mode)) 963 mkdir_p(cachedir, 0755); 964 //stdout_to_string("mkdir -p "+shell_protect(cachedir)); 965 stdout_to_string(cmd); 966 #ifndef __WIN32__ 967 stdout_to_string("chmod -R u+rwx " + shell_protect(cachedir)); 968 #endif 969 #ifdef HAVE_LSTAT 970 lstat(cachedir.c_str(), &statcache); 971 #else 972 stat(cachedir.c_str(), &statcache); 973 #endif 974 } 975 976 if (S_ISDIR(statcache.st_mode)) { 977 if (depth_limit == 1) { 978 cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl; 979 } else { 980 // max loop 5, magic start: /.ext+file 981 index_directory(depth_limit+5, "/."+ext+url, mime_map); 982 if (!nocleanup) { 983 if (!silent) 984 cout << "[CLEANUP " << "rm -rf " << shell_protect(cachedir) << "]" << endl; 985 rmdir(cachedir); 986 } 987 } 988 } 989 else { // no -p would be fatal here 990 cout << "cachedir " << shell_protect(cachedir) << " does not exist - skipped" << endl; 643 991 } 992 root = oldroot; 644 993 } 645 994 646 995 int … … 653 1002 static const struct option longopts[] = { 654 1003 { "help", no_argument, NULL, 'h' }, 655 1004 { "version", no_argument, NULL, 'v' }, 1005 { "verbose", no_argument, NULL, 'V' }, 1006 { "silent", no_argument, NULL, 'S' }, 656 1007 { "overwrite", no_argument, NULL, 'o' }, 657 1008 { "duplicates", required_argument, NULL, 'd' }, 658 1009 { "preserve-nonduplicates", no_argument, NULL, 'p' }, … … 667 1018 { "depth-limit",required_argument, NULL, 'l' }, 668 1019 { "follow", no_argument, NULL, 'f' }, 669 1020 { "stemmer", required_argument, NULL, 's' }, 1021 { "nocleanup", no_argument, NULL, 'c' }, 1022 { "cachedir", required_argument, NULL, 'C' }, 670 1023 { 0, 0, NULL, 0 } 671 1024 }; 672 1025 … … 717 1070 mime_map["xlt"] = "application/vnd.ms-excel"; // Excel template 718 1071 mime_map["ppt"] = "application/vnd.ms-powerpoint"; 719 1072 mime_map["pps"] = "application/vnd.ms-powerpoint"; // Powerpoint slideshow 1073 #ifdef HAVE_READPST 1074 // Outlook messager folder 1075 mime_map["pst"] = "application/vnd.ms-outlook-pst"; // readpst | uudeview (libpst) 1076 #endif 1077 #ifdef HAVE_MSGCONVERT 1078 mime_map["msg"] = "application/vnd.ms-outlook"; // outlook2text via msgconvert.pl 1079 #endif 1080 mime_map["mbox"] = "message/rfc822"; // => mbox2omega 720 1081 // Perl: 721 1082 mime_map["pl"] = "text/x-perl"; 722 1083 mime_map["pm"] = "text/x-perl"; … … 727 1088 // DjVu: 728 1089 mime_map["djv"] = "image/vnd.djvu"; 729 1090 mime_map["djvu"] = "image/vnd.djvu"; 1091 #ifndef _MSC_VER 1092 mime_map["zip"] = "application/x-zip"; // recursive scanning 1093 # ifdef HAVE_UNRAR 1094 mime_map["rar"] = "application/x-rar"; // recursive scanning 1095 # endif 1096 #endif 1097 1098 read_config_file(); 1099 1100 #ifdef HAVE_TEXTCAT 1101 textcat = textcat_Init( LANGCLASS_CONF ); 1102 #endif 730 1103 731 while ((getopt_ret = gnu_getopt_long(argc, argv, "hvd:D:U:M: lpf", longopts, NULL)) != -1) {1104 while ((getopt_ret = gnu_getopt_long(argc, argv, "hvd:D:U:M:C:lpfc", longopts, NULL))!=EOF) { 732 1105 switch (getopt_ret) { 733 1106 case 'h': { 734 1107 cout << PROG_NAME" - "PROG_DESC"\n\n" … … 753 1126 " duplicate replace mode\n" 754 1127 " -D, --db path to database to use\n" 755 1128 " -U, --url base url DIRECTORY represents (default: /)\n" 1129 " -C, --cachedir path to local cache to use (default from omega.conf)\n" 756 1130 " -M, --mime-type additional MIME mapping ext:type\n" 757 1131 " -l, --depth-limit=LIMIT set recursion limit (0 = unlimited)\n" 758 1132 " -f, --follow follow symbolic links\n" 1133 " -c, --nocleanup keep cache, don't delete temporary .zip,.rar,.pst,.msg cache folders\n" 759 1134 " --overwrite create the database anew (the default is to update\n" 760 " if the database already exists)" << endl; 1135 " if the database already exists)" 1136 " --verbose Print commands also\n" 1137 " --silent Print only errors\n"; 761 1138 print_stemmer_help(" "); 762 1139 print_help_and_version_help(" "); 763 1140 return 0; … … 785 1162 case 'p': // don't delete unupdated documents 786 1163 preserve_unupdated = true; 787 1164 break; 1165 case 'V': 1166 verbose = true; 1167 break; 1168 case 'c': 1169 nocleanup = true; 1170 break; 788 1171 case 'l': { // Set recursion limit 789 1172 int arg = atoi(optarg); 790 1173 if (arg < 0) arg = 0; … … 817 1200 case 'U': 818 1201 baseurl = optarg; 819 1202 break; 1203 case 'C': 1204 cache_dir = optarg; 1205 break; 820 1206 case 'o': // --overwrite 821 1207 overwrite = true; 822 1208 break; … … 844 1230 if (baseurl.empty()) { 845 1231 cerr << PROG_NAME": --url not specified, assuming `/'.\n"; 846 1232 } 1233 error_log = " 2>>"+log_dir+"omindex-error.log"; 847 1234 // baseurl mustn't end '/' or you end up with the wrong URL 848 1235 // (//thing is different to /thing). We could probably make this 849 1236 // safe a different way, by ensuring that we don't put a leading '/' … … 869 1256 } else { 870 1257 indexroot = ""; // index the whole of root 871 1258 } 1259 // add the db basename to cache_dir 1260 { 1261 const char *p = strrchr(dbpath.c_str(), '/'); 1262 // on windows only 1263 if (!p) p = strrchr(dbpath.c_str(), '\\'); 1264 if (p) { p++; } else { p = dbpath.c_str(); } 1265 cache_dir += p; 1266 } 872 1267 873 1268 int exitcode = 1; 874 1269 try { … … 905 1300 cout << "Exception: " << s << endl; 906 1301 } catch (const char *s) { 907 1302 cout << "Exception: " << s << endl; 1303 if (!tmpdir.empty()) rmdir(tmpdir.c_str()); 1304 return exitcode; 908 1305 } catch (...) { 909 1306 cout << "Caught unknown exception" << endl; 910 1307 } … … 912 1309 // If we created a temporary directory then delete it. 913 1310 if (!tmpdir.empty()) rmdir(tmpdir.c_str()); 914 1311 1312 #ifdef HAVE_TEXTCAT 1313 textcat_Done(textcat); 1314 #endif 915 1315 return exitcode; 916 1316 } -
xapian-omega-1.0.7a/outlook2text.in
diff -u xapian-omega-1.0.7a/outlook2text.in.orig
old new 1 #! /bin/sh 2 # converts msg to mbox and extract attachments 3 # either be in the cache dir, or accept it as 2nd arg 4 if [ -n $2 ]; then 5 @MSGCONVERT@ "$1" | @MIMEEXPLODE@ -d "$2" 6 else 7 # already is in the cache dir 8 base=`basename "$1" .msg` 9 @MSGCONVERT@ "$1" | @MIMEEXPLODE@ -d "${base}" 10 fi -
xapian-omega-1.0.7a/ppt2text.in
diff -u xapian-omega-1.0.7a/ppt2text.in.orig
old new 1 #! /bin/sh 2 # strip numbers, to stdout 3 @CATPPT@ "$1" | @SED@ -re's/[0123456789.]+,//g' -
xapian-omega-1.0.7a/query.cc
diff -u xapian-omega-1.0.7a/query.cc.orig
old new 107 107 108 108 static Xapian::Query query; 109 109 //static string url_query_string; 110 Xapian::Query::op default_op = Xapian::Query::OP_ OR; // default matching mode110 Xapian::Query::op default_op = Xapian::Query::OP_AND; // default matching mode 111 111 112 112 static Xapian::QueryParser qp; 113 113 static Xapian::Stem *stemmer = NULL; … … 141 141 switch (t[0]) { 142 142 case 'a': 143 143 return (t == "a" || t == "about" || t == "an" || t == "and" || 144 t == "are" || t == "as" || t == "at"); 144 t == "are" || t == "as" || t == "at" || t == "according" || 145 t == "again" || t == "against" || t == "ah" || t == "all" || 146 t == "although" || t == "always" || t == "anyone" || t == "after" || 147 t == "also" || t == "any"); 145 148 case 'b': 146 149 return (t == "be" || t == "by"); 147 150 case 'e': … … 236 239 } 237 240 238 241 try { 239 query = qp.parse_query(query_string); 242 //query = qp.parse_query(query_string); // simple query 243 query = qp.parse_query(query_string, qp.FLAG_WILDCARD); 240 244 } catch (Xapian::QueryParserError &e) { 241 245 error_msg = e.get_msg(); 242 246 return BAD_QUERY; -
xapian-omega-1.0.7a/runfilter.cc
diff -u xapian-omega-1.0.7a/runfilter.cc.orig
old new 60 60 61 61 using namespace std; 62 62 63 extern string error_log; 64 extern bool verbose; 65 63 66 string 64 67 stdout_to_string(const string &cmd) 65 68 { … … 97 100 setrlimit(RLIMIT_AS, &ram_limit); 98 101 } 99 102 100 execl("/bin/sh", "/bin/sh", "-c", cmd.c_str(), (void*)NULL); 103 string tmp; 104 tmp = cmd + error_log; 105 if (verbose) { 106 cout << " Executing '" << tmp << "'..." << endl; 107 } 108 109 execl("/bin/sh", "/bin/sh", "-c", tmp.c_str(), (void*)NULL); 101 110 _exit(-1); 102 111 } 103 112 … … 134 143 throw ReadError(); 135 144 } 136 145 #else 137 FILE * fh = popen(cmd.c_str(), "r"); 146 string tmp; 147 tmp = cmd + error_log; 148 if (verbose) { 149 cout << " Executing '" << tmp << "'..." << endl; 150 } 151 FILE * fh = popen(tmp.c_str(), "r"); 138 152 if (fh == NULL) throw ReadError(); 139 153 while (!feof(fh)) { 140 154 char buf[4096]; -
xapian-omega-1.0.7a/scriptindex.cc
diff -u xapian-omega-1.0.7a/scriptindex.cc.orig
old new 4 4 * Copyright 2001 Sam Liddicott 5 5 * Copyright 2001,2002 Ananova Ltd 6 6 * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts 7 * Copyright 2006,2007 AVL List GesmbH 7 8 * 8 9 * This program is free software; you can redistribute it and/or 9 10 * modify it under the terms of the GNU General Public License as … … 38 39 #include <stdio.h> 39 40 #include <time.h> 40 41 #include "safeunistd.h" 42 #include <sys/stat.h> 41 43 42 44 #include "commonhelp.h" 45 #include "configfile.h" 43 46 #include "hashterm.h" 44 47 #include "loadfile.h" 45 48 #include "myhtmlparse.h" 46 49 #include "stringutils.h" 47 50 #include "utf8truncate.h" 48 51 #include "utils.h" 52 #include "values.h" 49 53 50 54 #include "gnu_getopt.h" 51 55 56 #ifdef HAVE_TEXTCAT 57 # include "textcat.h" 58 # ifndef LANGCLASS_CONF 59 # define LANGCLASS_CONF "/var/lib/omega/langclass/langclass.conf" 60 # endif 61 #endif 62 52 63 using namespace std; 53 64 54 65 #define PROG_NAME "scriptindex" … … 58 69 static int addcount; 59 70 static int repcount; 60 71 static int delcount; 72 static string curr_lang = "english"; 73 #ifdef HAVE_TEXTCAT 74 static void *textcat; 75 #endif 61 76 62 77 inline static bool 63 78 p_space(unsigned int c) … … 422 437 { 423 438 string line; 424 439 size_t line_no = 0; 440 time_t last_mod = 0; 441 long file_size = 0; 442 string language; 443 444 if (strcmp(fname,"<stdin>") != 0) { 445 struct stat statbuf; 446 stat(fname, &statbuf); 447 if (! statbuf.st_size) { 448 cout << "Empty \"" << string(fname) << "\" - skipping\n"; 449 return false; 450 } 451 file_size = statbuf.st_size; 452 last_mod = statbuf.st_mtime; 453 } 425 454 while (!stream.eof() && getline(stream, line)) { 426 455 ++line_no; 427 456 Xapian::Document doc; … … 459 488 value += line; 460 489 } 461 490 491 #ifdef HAVE_TEXTCAT 492 char * lang; 493 lang = textcat_Classify( textcat, value.c_str(), value.length()+1 ); 494 language = string(lang); 495 if ((language != _TEXTCAT_RESULT_UNKOWN) // unknown language 496 && (language != _TEXTCAT_RESULT_SHORT)) // too little information 497 { 498 if (language[0] == '[') { 499 int pos = language.find(']',0); 500 language = language.substr(1,pos-1); 501 } 502 // cache language. may be overridden by the script 503 if (language != curr_lang) { 504 cout << "new language " << curr_lang << " => " << language << " "; 505 indexer.set_stemmer(Xapian::Stem(language)); 506 curr_lang = language; 507 } 508 } else { 509 language = ""; 510 } 511 #endif 512 462 513 const vector<Action> &v = index_spec[field]; 463 514 string old_value = value; 464 515 vector<Action>::const_iterator i; … … 638 689 for (i = fields.begin(); i != fields.end(); ++i) { 639 690 list<string>::const_iterator j; 640 691 for (j = i->second.begin(); j != i->second.end(); j++) { 692 if (i->first == "language") language = string(); 693 if (i->first == "lastmod") last_mod = 0; 694 if (i->first == "size") file_size = 0; 641 695 data += i->first; 642 696 data += '='; 643 697 data += *j; 644 698 data += '\n'; 645 699 } 646 700 } 701 // provide some extra fields if not already provided by the script 702 #ifdef HAVE_TEXTCAT 703 if (!language.empty()) // autodetected language 704 data += "language="+language+'\n'; 705 #endif 706 if (last_mod) { // if indexed per filename 707 data += "lastmod="+int_to_string(last_mod)+'\n'; 708 doc.add_value(VALUE_LASTMOD, int_to_string(last_mod)); 709 } 710 if (file_size) { // if indexed per filename 711 data += "size="+int_to_string(file_size)+'\n'; 712 doc.add_value(VALUE_FILESIZE, int_to_string(file_size)); 713 } 647 714 648 715 // Put the data in the document 649 716 doc.set_data(data); … … 682 749 // If the database already exists, default to updating not overwriting. 683 750 int database_mode = Xapian::DB_CREATE_OR_OPEN; 684 751 verbose = false; 685 Xapian::Stem stemmer("english"); 752 int exit_code = 0; 753 Xapian::Stem stemmer(curr_lang); 686 754 687 755 static const struct option longopts[] = { 688 756 { "help", no_argument, NULL, 'h' }, … … 742 810 exit(show_help ? 0 : 1); 743 811 } 744 812 813 #ifdef HAVE_TEXTCAT 814 textcat = textcat_Init( LANGCLASS_CONF ); 815 #endif 816 745 817 parse_index_script(argv[1]); 746 818 747 819 // Catch any Xapian::Error exceptions thrown. … … 785 857 786 858 cout << "records (added, replaced, deleted) = (" << addcount << 787 859 ", " << repcount << ", " << delcount << ")" << endl; 788 } catch (const Xapian::Error &error) { 789 cout << "Exception: " << error.get_msg() << endl; 790 exit(1); 860 } catch (const Xapian::Error &e) { 861 cout << "Exception: XapianError:\"" << e.get_msg() 862 << "\" Type:" << e.get_type() << " Context:" << e.get_context() 863 << " Errno:" << e.get_error_string() << endl; 864 exit_code = 1; 791 865 } catch (const std::bad_alloc &) { 792 866 cout << "Exception: std::bad_alloc" << endl; 793 exit (1);867 exit_code = 1; 794 868 } catch (...) { 795 869 cout << "Unknown Exception" << endl; 796 exit (1);870 exit_code = 1; 797 871 } 872 #ifdef HAVE_TEXTCAT 873 textcat_Done(textcat); 874 #endif 875 return exit_code; 798 876 } -
xapian-omega-1.0.7a/utils.cc
diff -u xapian-omega-1.0.7a/utils.cc.orig
old new 22 22 #include <config.h> 23 23 24 24 #include "utils.h" 25 #include "common/stringutils.h" 25 26 26 27 #include <stdio.h> // for sprintf/snprintf 27 28 #include <stdlib.h> … … 30 31 31 32 using namespace std; 32 33 34 #ifdef __WIN32__ 35 #include "safewindows.h" 36 #endif 37 33 38 // This ought to be enough for any of the conversions below. 34 39 #define BUFSIZE 100 35 40 41 /// Remove a directory and contents. 42 void 43 rmdir(const string &filename) 44 { 45 // Check filename exists and is actually a directory 46 struct stat sb; 47 if (stat(filename, &sb) != 0 || !S_ISDIR(sb.st_mode)) return; 48 49 string safefile = shell_protect(filename); 50 #ifdef __WIN32__ 51 # if 1 52 static int win95 = -1; 53 if (win95 == -1) { 54 OSVERSIONINFO info; 55 memset(&info, 0, sizeof(OSVERSIONINFO)); 56 info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); 57 if (GetVersionEx(&info)) { 58 win95 = (info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS); 59 } 60 } 61 62 if (win95) { 63 // for 95 like systems: 64 system("deltree /y \"" + safefile + "\""); 65 } else { 66 // for NT like systems: 67 system("rd /s /q \"" + safefile + "\""); 68 } 69 # else 70 safefile.append("\0", 2); 71 SHFILEOPSTRUCT shfo; 72 memset((void*)&shfo, 0, sizeof(shfo)); 73 shfo.hwnd = 0; 74 shfo.wFunc = FO_DELETE; 75 shfo.pFrom = safefile.data(); 76 shfo.fFlags = FOF_NOCONFIRMATION|FOF_NOERRORUI|FOF_SILENT; 77 (void)SHFileOperation(&shfo); 78 # endif 79 #else 80 system("rm -rf " + safefile); 81 #endif 82 } -
xapian-omega-1.0.7a/utils.h
diff -u xapian-omega-1.0.7a/utils.h.orig
old new 22 22 23 23 #include <string> 24 24 25 #include <stdlib.h> 26 #include <sys/stat.h> 27 #include <sys/types.h> 28 #ifdef _MSC_VER 29 # include <direct.h> 30 # include <io.h> 31 #else 32 # include <unistd.h> 33 #endif 34 #include <ctype.h> 35 #include <fcntl.h> 36 37 25 38 /** Converts year, month, day into an 8 character string like: "20061031". */ 26 39 std::string date_to_string(int year, int month, int day); 27 40 … … 37 50 /** Converts a string to an int. */ 38 51 int string_to_int(const std::string & s); 39 52 53 /* AVL */ 54 using namespace std; 55 56 void rmdir(const string &filename); 57 58 /// Allow mkdir to work directly on C++ strings. 59 #ifdef __WIN32__ 60 inline int mkdir(const string &filename, int /*mode*/) { 61 return _mkdir(filename.c_str()); 62 } 63 #else 64 inline int mkdir(const string &filename, mode_t mode) { 65 return mkdir(filename.c_str(), mode); 66 } 67 #endif 68 69 /// Allow stat to work directly on C++ strings. 70 inline int stat(const string &filename, struct stat *buf) { 71 return stat(filename.c_str(), buf); 72 } 73 74 /// Allow unlink to work directly on C++ strings. 75 inline int unlink(const string &filename) { return unlink(filename.c_str()); } 76 77 /// Allow system to work directly on C++ strings. 78 inline int system(const string &command) { return system(command.c_str()); } 79 40 80 #endif -
xapian-omega-1.0.7a/values.h
diff -u xapian-omega-1.0.7a/values.h.orig
old new 44 44 enum value_slot { 45 45 VALUE_LASTMOD = 0, // 4 byte big endian value - seconds since 1970. 46 46 VALUE_MD5 = 1 // 16 byte MD5 checksum of original document. 47 VALUE_FILESIZE = 2 // filesize in bytes 47 48 }; 48 49 49 50 inline uint32_t binary_string_to_int(const std::string &s) -
xapian-omega-1.0.7a/xapian-omega.spec.in
diff -u xapian-omega-1.0.7a/xapian-omega.spec.in.orig
old new 45 45 # Create /var directories 46 46 mkdir -p %{buildroot}%{contentdir}/omega/data 47 47 mkdir -p %{buildroot}%{contentdir}/omega/cdb 48 mkdir -p %{buildroot}%{contentdir}/omega/cache 48 49 mkdir -p %{buildroot}%{logdir}/omega 49 50 # Default templates 50 51 mkdir -p %{buildroot}%{contentdir}/omega/templates … … 77 78 /var/www/icons/omega 78 79 %{_datadir}/%{name} 79 80 %config(noreplace) /etc/omega.conf 80 %doc %{_datadir}/doc/%{name}-%{version} 81 %docdir /usr/share/doc/%{name}-%{version} 82 %doc AUTHORS ChangeLog COPYING NEWS README TODO 81 83 # man pages may be gzipped, hence the trailing wildcard. 82 84 %{_mandir}/man1/omindex.1* 83 85 %{_mandir}/man1/scriptindex.1*