-
diff --git a/xapian-applications/omega/ChangeLog b/xapian-applications/omega/ChangeLog
index 74f3396..878ba89 100644
a
|
b
|
|
| 1 | 2006-08-22 09:30:12 Reini Urban <reinhard.urban@avl.com> |
| 2 | |
| 3 | omega-0.9.6c: |
| 4 | * scriptindex.cc: Support TEXTCAT for language autodetection. |
| 5 | |
| 6 | 2006-08-17 18:06:26 Reini Urban <reinhard.urban@avl.com> |
| 7 | |
| 8 | omega-0.9.6a: |
| 9 | * configure.ac: Add HAVE_TEXTCAT. |
| 10 | * omindex.cc: Added libtextcat. |
| 11 | * commonhelp.cc: Update stemmer help with HAVE_TEXTCAT (lang autodetection) |
| 12 | * langclass, langclass.conf: New file and directory |
| 13 | |
1 | 14 | Tue May 12 14:07:27 GMT 2015 Olly Betts <olly@survex.com> |
2 | 15 | |
3 | 16 | * docs/overview.rst,omindex.cc: Allow --filter to handle commands |
-
diff --git a/xapian-applications/omega/Makefile.am b/xapian-applications/omega/Makefile.am
index e3befca..c975a1c 100644
a
|
b
|
EXTRA_DIST =\
|
69 | 69 | images/score-7.png \ |
70 | 70 | images/score-8.png \ |
71 | 71 | images/score-9.png \ |
72 | | xapian-omega.spec |
| 72 | xapian-omega.spec \ |
| 73 | \$(wildcard langclass/*.lm) langclass.conf |
73 | 74 | |
74 | 75 | AM_CPPFLAGS = \ |
75 | 76 | -I$(top_srcdir)/common \ |
… |
… |
omindex_SOURCES = omindex.cc myhtmlparse.cc htmlparse.cc\
|
154 | 155 | if NEED_MKDTEMP |
155 | 156 | omindex_SOURCES += portability/mkdtemp.cc |
156 | 157 | endif |
157 | | omindex_LDADD = $(MAGIC_LIBS) $(XAPIAN_LIBS) |
| 158 | omindex_LDADD = $(MAGIC_LIBS) $(TEXTCAT_LIBS) $(XAPIAN_LIBS) |
158 | 159 | |
159 | 160 | scriptindex_SOURCES = scriptindex.cc myhtmlparse.cc htmlparse.cc\ |
160 | 161 | common/getopt.cc commonhelp.cc utils.cc hashterm.cc loadfile.cc\ |
161 | 162 | common/safe.cc common/stringutils.cc utf8convert.cc utf8truncate.cc\ |
162 | 163 | common/keyword.cc timegm.cc |
163 | | scriptindex_LDADD = $(XAPIAN_LIBS) |
| 164 | scriptindex_LDADD = $(TEXTCAT_LIBS) $(XAPIAN_LIBS) |
164 | 165 | |
165 | 166 | omindex_list_SOURCES = common/getopt.cc omindex-list.cc |
166 | 167 | omindex_list_LDADD = $(XAPIAN_LIBS) |
-
diff --git a/xapian-applications/omega/configure.ac b/xapian-applications/omega/configure.ac
index 9a8bbfd..f2dcc3d 100644
a
|
b
|
case $ac_cv_func_snprintf in
|
241 | 241 | ;; |
242 | 242 | esac |
243 | 243 | |
| 244 | AC_ARG_WITH([textcat], |
| 245 | AC_HELP_STRING([--without-textcat], [don't use libtextcat for automatic language classification])) |
| 246 | TEXTCAT_LIBS= |
| 247 | if test xno != x$with_textcat; then |
| 248 | AC_CHECK_LIB([textcat], [textcat_Init], [ |
| 249 | TEXTCAT_LIBS=-ltextcat |
| 250 | AC_DEFINE([HAVE_LIBTEXTCAT], [1], [Define to 1 if you have libtextcat for automatic language classification]) |
| 251 | ]) |
| 252 | fi |
| 253 | AC_SUBST([TEXTCAT_LIBS]) |
| 254 | |
244 | 255 | AC_ARG_WITH(iconv, |
245 | 256 | AC_HELP_STRING([--with-iconv], [force use of iconv (error if not found)]) |
246 | 257 | AC_HELP_STRING([--without-iconv], [don't use iconv]), |
-
diff --git a/xapian-applications/omega/langclass.conf b/xapian-applications/omega/langclass.conf
new file mode 100644
index 0000000..8a6cf5c
-
|
+
|
|
| 1 | # A config file for the language models |
| 2 | # provided with Gertjan van Noords language guesser |
| 3 | # (http://odur.let.rug.nl/~vannoord/TextCat/) |
| 4 | # |
| 5 | # Notes: |
| 6 | # - Putting the most probable languages at the top of the list |
| 7 | # improves performance, because this will raise the threshold for |
| 8 | # likely candidates more quickly. |
| 9 | # |
| 10 | /var/lib/omega/langclass/english.lm english |
| 11 | /var/lib/omega/langclass/german.lm german |
| 12 | /var/lib/omega/langclass/french.lm french |
| 13 | /var/lib/omega/langclass/danish.lm danish |
| 14 | /var/lib/omega/langclass/dutch.lm dutch |
| 15 | /var/lib/omega/langclass/finnish.lm finnish |
| 16 | /var/lib/omega/langclass/italian.lm italian |
| 17 | /var/lib/omega/langclass/norwegian.lm norwegian |
| 18 | /var/lib/omega/langclass/portuguese.lm portuguese |
| 19 | /var/lib/omega/langclass/russian-iso8859_5.lm russian |
| 20 | /var/lib/omega/langclass/russian-koi8_r.lm russian |
| 21 | /var/lib/omega/langclass/russian-windows1251.lm russian |
| 22 | /var/lib/omega/langclass/spanish.lm spanish |
| 23 | /var/lib/omega/langclass/swedish.lm swedish |
-
diff --git a/xapian-applications/omega/langclass/danish.lm b/xapian-applications/omega/langclass/danish.lm
new file mode 100644
index 0000000..5e5a61a
-
|
+
|
|
| 1 | _ 21274 |
| 2 | e 9291 |
| 3 | r 5307 |
| 4 | n 4733 |
| 5 | i 3976 |
| 6 | t 3948 |
| 7 | s 3751 |
| 8 | a 3296 |
| 9 | l 3063 |
| 10 | d 3025 |
| 11 | o 2868 |
| 12 | g 2471 |
| 13 | er 2164 |
| 14 | k 2002 |
| 15 | m 1680 |
| 16 | e_ 1655 |
| 17 | en 1613 |
| 18 | f 1507 |
| 19 | de 1484 |
| 20 | r_ 1379 |
| 21 | v 1245 |
| 22 | u 1176 |
| 23 | t_ 1081 |
| 24 | n_ 1032 |
| 25 | er_ 992 |
| 26 | b 942 |
| 27 | . 870 |
| 28 | ge 868 |
| 29 | ._ 831 |
| 30 | re 816 |
| 31 | h 816 |
| 32 | et 813 |
| 33 | te 813 |
| 34 | p 806 |
| 35 | in 788 |
| 36 | or 775 |
| 37 | _s 753 |
| 38 | _a 749 |
| 39 | en_ 712 |
| 40 | _e 691 |
| 41 | ti 689 |
| 42 | an 687 |
| 43 | , 681 |
| 44 | ,_ 677 |
| 45 | _f 655 |
| 46 | _d 645 |
| 47 | el 642 |
| 48 | ng 635 |
| 49 | nd 634 |
| 50 | g_ 634 |
| 51 | se 615 |
| 52 | le 615 |
| 53 | st 607 |
| 54 | s_ 601 |
| 55 | _o 572 |
| 56 | ne 560 |
| 57 | li 537 |
| 58 | et_ 524 |
| 59 | es 521 |
| 60 | _i 512 |
| 61 | ri 511 |
| 62 | sk 510 |
| 63 | _de 498 |
| 64 | å 497 |
| 65 | ar 475 |
| 66 | ed 473 |
| 67 | ig 463 |
| 68 | at 452 |
| 69 | _m 446 |
| 70 | is 443 |
| 71 | fo 441 |
| 72 | æ 441 |
| 73 | ve 438 |
| 74 | _k 434 |
| 75 | ø 432 |
| 76 | der 429 |
| 77 | ke 428 |
| 78 | ing 427 |
| 79 | og 426 |
| 80 | _b 412 |
| 81 | me 408 |
| 82 | il 407 |
| 83 | for 405 |
| 84 | ns 394 |
| 85 | y 389 |
| 86 | _h 380 |
| 87 | _t 374 |
| 88 | on 371 |
| 89 | d_ 370 |
| 90 | al 362 |
| 91 | be 359 |
| 92 | _fo 351 |
| 93 | af 336 |
| 94 | de_ 335 |
| 95 | _og 333 |
| 96 | _p 332 |
| 97 | og_ 325 |
| 98 | om 325 |
| 99 | _for 324 |
| 100 | _og_ 313 |
| 101 | l_ 308 |
| 102 | nge 302 |
| 103 | i_ 295 |
| 104 | _v 294 |
| 105 | c 289 |
| 106 | ter 283 |
| 107 | ll 280 |
| 108 | ni 278 |
| 109 | nde 278 |
| 110 | rs 277 |
| 111 | _af 277 |
| 112 | un 275 |
| 113 | ra 271 |
| 114 | ko 271 |
| 115 | den 270 |
| 116 | _i_ 268 |
| 117 | id 265 |
| 118 | til 265 |
| 119 | j 265 |
| 120 | vi 264 |
| 121 | D 260 |
| 122 | ere 256 |
| 123 | ma 255 |
| 124 | si 253 |
| 125 | f_ 252 |
| 126 | af_ 238 |
| 127 | _af_ 235 |
| 128 | ik 235 |
| 129 | m_ 234 |
| 130 | å_ 232 |
| 131 | _ti 227 |
| 132 | _D 226 |
| 133 | _u 226 |
| 134 | _er 225 |
| 135 | nt 224 |
| 136 | _en 224 |
| 137 | ls 221 |
| 138 | es_ 216 |
| 139 | lig 216 |
| 140 | ger 216 |
| 141 | re_ 210 |
| 142 | ag 210 |
| 143 | _me 207 |
| 144 | at_ 204 |
| 145 | lle 200 |
| 146 | ge_ 200 |
| 147 | _til 200 |
| 148 | ige 199 |
| 149 | _er_ 199 |
| 150 | der_ 199 |
| 151 | em 199 |
| 152 | ds 197 |
| 153 | r. 195 |
| 154 | io 195 |
| 155 | r._ 195 |
| 156 | ud 193 |
| 157 | _at 192 |
| 158 | _at_ 191 |
| 159 | ta 190 |
| 160 | els 190 |
| 161 | _l 190 |
| 162 | ha 190 |
| 163 | il_ 189 |
| 164 | or_ 189 |
| 165 | ke_ 186 |
| 166 | rt 185 |
| 167 | gen 184 |
| 168 | ka 183 |
| 169 | - 180 |
| 170 | rk 180 |
| 171 | ning 178 |
| 172 | ol 178 |
| 173 | nin 178 |
| 174 | la 177 |
| 175 | ld 175 |
| 176 | De 175 |
| 177 | it 173 |
| 178 | ede 172 |
| 179 | ed_ 171 |
| 180 | _ko 171 |
| 181 | lse 171 |
| 182 | ek 168 |
| 183 | else 167 |
| 184 | inge 167 |
| 185 | på 167 |
| 186 | ng_ 167 |
| 187 | _på 167 |
| 188 | iv 166 |
| 189 | ør 166 |
| 190 | so 165 |
| 191 | he 165 |
| 192 | ens 165 |
| 193 | ske 165 |
| 194 | ind 164 |
| 195 | til_ 163 |
| 196 | rn 163 |
| 197 | ide 162 |
| 198 | ev 162 |
| 199 | den_ 162 |
| 200 | to 162 |
| 201 | sen 160 |
| 202 | _be 160 |
| 203 | sa 160 |
| 204 | bl 158 |
| 205 | _g 158 |
| 206 | an_ 157 |
| 207 | det 156 |
| 208 | om_ 156 |
| 209 | ru 156 |
| 210 | va 155 |
| 211 | _til_ 155 |
| 212 | ste 154 |
| 213 | rd 153 |
| 214 | _på_ 152 |
| 215 | k_ 152 |
| 216 | på_ 152 |
| 217 | di 152 |
| 218 | kr 152 |
| 219 | K 151 |
| 220 | _De 149 |
| 221 | for_ 148 |
| 222 | te_ 148 |
| 223 | kon 148 |
| 224 | ver 147 |
| 225 | mm 146 |
| 226 | am 146 |
| 227 | _en_ 145 |
| 228 | _r 145 |
| 229 | ne_ 144 |
| 230 | ing_ 144 |
| 231 | tr 143 |
| 232 | le_ 142 |
| 233 | del 142 |
| 234 | _in 142 |
| 235 | gt 140 |
| 236 | _st 138 |
| 237 | S 138 |
| 238 | eg 138 |
| 239 | gs 138 |
| 240 | tt 138 |
| 241 | r, 137 |
| 242 | ser 137 |
| 243 | r,_ 137 |
| 244 | er. 137 |
| 245 | ro 137 |
| 246 | er._ 137 |
| 247 | _for_ 136 |
| 248 | ent 136 |
| 249 | kt 136 |
| 250 | eri 135 |
| 251 | ur 134 |
| 252 | lin 134 |
| 253 | B 133 |
| 254 | A 133 |
| 255 | sti 133 |
| 256 | ner 133 |
| 257 | da 133 |
| 258 | ris 132 |
| 259 | ion 132 |
| 260 | _K 131 |
| 261 | ern 131 |
| 262 | ers 130 |
| 263 | ist 130 |
| 264 | ær 130 |
| 265 | ige_ 130 |
| 266 | _si 130 |
| 267 | tte 129 |
| 268 | E 128 |
| 269 | _n 128 |
| 270 | nn 127 |
| 271 | _B 126 |
| 272 | _ha 126 |
| 273 | _. 126 |
| 274 | rne 125 |
| 275 | H 125 |
| 276 | _ud 125 |
| 277 | rin 124 |
| 278 | na 124 |
| 279 | und 124 |
| 280 | ft 124 |
| 281 | _der 124 |
| 282 | ku 123 |
| 283 | _A 122 |
| 284 | ler 120 |
| 285 | and 120 |
| 286 | end 120 |
| 287 | ns_ 120 |
| 288 | rg 119 |
| 289 | op 119 |
| 290 | er,_ 119 |
| 291 | er, 119 |
| 292 | ar_ 118 |
| 293 | P 118 |
| 294 | _S 117 |
| 295 | _H 117 |
| 296 | _._ 116 |
| 297 | ov 116 |
| 298 | erne 115 |
| 299 | tio 115 |
| 300 | med 115 |
| 301 | tion 115 |
| 302 | _E 115 |
| 303 | _P 115 |
| 304 | det_ 114 |
| 305 | pr 114 |
| 306 | e. 113 |
| 307 | ter_ 113 |
| 308 | : 113 |
| 309 | kk 113 |
| 310 | e._ 113 |
| 311 | e,_ 113 |
| 312 | e, 113 |
| 313 | od 113 |
| 314 | kke 113 |
| 315 | ten 113 |
| 316 | ling 113 |
| 317 | :_ 112 |
| 318 | mi 112 |
| 319 | eli 112 |
| 320 | lo 111 |
| 321 | som 111 |
| 322 | _den 111 |
| 323 | rb 110 |
| 324 | se_ 110 |
| 325 | ell 110 |
| 326 | sid 110 |
| 327 | nne 109 |
| 328 | fi 108 |
| 329 | lt 107 |
| 330 | v_ 107 |
| 331 | _de_ 107 |
| 332 | ark 106 |
| 333 | lige 106 |
| 334 | ngen 106 |
| 335 | ie 105 |
| 336 | _med 105 |
| 337 | _der_ 105 |
| 338 | ring 105 |
| 339 | a_ 105 |
| 340 | _vi 104 |
| 341 | -_ 104 |
| 342 | ys 103 |
| 343 | gel 103 |
| 344 | _so 103 |
| 345 | ia 103 |
| 346 | ive 102 |
| 347 | ej 101 |
| 348 | ati 101 |
| 349 | ren 101 |
| 350 | _det 101 |
| 351 | side 101 |
| 352 | ske_ 101 |
| 353 | br 100 |
| 354 | gi 100 |
| 355 | F 100 |
| 356 | M 100 |
| 357 | ul 99 |
| 358 | isk 99 |
| 359 | men 99 |
| 360 | n,_ 99 |
| 361 | age 99 |
| 362 | fr 99 |
| 363 | n, 99 |
| 364 | tu 98 |
| 365 | ts 98 |
| 366 | _ma 98 |
| 367 | nder 98 |
| 368 | ot 97 |
| 369 | dt 97 |
| 370 | R 97 |
| 371 | med_ 96 |
| 372 | ho 96 |
| 373 | ans 95 |
| 374 | _kon 95 |
| 375 | pe 95 |
| 376 | ce 94 |
| 377 | gr 93 |
| 378 | mme 92 |
| 379 | ret 92 |
| 380 | lige_ 92 |
| 381 | mu 91 |
| 382 | _med_ 91 |
| 383 | hv 91 |
| 384 | væ 91 |
| 385 | Det 91 |
| 386 | ens_ 91 |
| 387 | kl 91 |
| 388 | _M 90 |
| 389 | T 90 |
| 390 | ingen 90 |
| 391 | rm 90 |
| 392 | ill 89 |
| 393 | elle 89 |
| 394 | ef 89 |
| 395 | ene 89 |
| 396 | nds 89 |
| 397 | ove 89 |
| 398 | som_ 89 |
| 399 | C 88 |
| 400 | _den_ 88 |
-
diff --git a/xapian-applications/omega/langclass/dutch.lm b/xapian-applications/omega/langclass/dutch.lm
new file mode 100644
index 0000000..17a0626
-
|
+
|
|
| 1 | _ 20104 |
| 2 | e 9848 |
| 3 | n 5323 |
| 4 | a 3733 |
| 5 | t 3683 |
| 6 | i 3490 |
| 7 | r 3195 |
| 8 | d 2876 |
| 9 | o 2845 |
| 10 | n_ 2443 |
| 11 | en 2439 |
| 12 | s 2195 |
| 13 | e_ 1842 |
| 14 | l 1837 |
| 15 | g 1522 |
| 16 | en_ 1500 |
| 17 | de 1489 |
| 18 | er 1388 |
| 19 | t_ 1377 |
| 20 | v 1253 |
| 21 | u 1217 |
| 22 | k 1204 |
| 23 | _d 1136 |
| 24 | h 1102 |
| 25 | m 1084 |
| 26 | an 939 |
| 27 | te 875 |
| 28 | j 857 |
| 29 | in 810 |
| 30 | _v 793 |
| 31 | r_ 751 |
| 32 | de_ 742 |
| 33 | ee 737 |
| 34 | p 732 |
| 35 | et 718 |
| 36 | ge 716 |
| 37 | aa 708 |
| 38 | b 703 |
| 39 | _e 686 |
| 40 | st 669 |
| 41 | z 668 |
| 42 | ie 662 |
| 43 | _de 655 |
| 44 | w 631 |
| 45 | c 611 |
| 46 | . 604 |
| 47 | s_ 582 |
| 48 | _de_ 576 |
| 49 | _h 572 |
| 50 | el 570 |
| 51 | ij 564 |
| 52 | ._ 554 |
| 53 | et_ 531 |
| 54 | an_ 522 |
| 55 | he 505 |
| 56 | _o 497 |
| 57 | nd 478 |
| 58 | _i 475 |
| 59 | ar 459 |
| 60 | _m 451 |
| 61 | re 442 |
| 62 | ve 441 |
| 63 | ' 428 |
| 64 | or 424 |
| 65 | ng 421 |
| 66 | at 418 |
| 67 | _s 415 |
| 68 | oo 403 |
| 69 | _z 401 |
| 70 | le 395 |
| 71 | _b 394 |
| 72 | _a 391 |
| 73 | _he 386 |
| 74 | va 385 |
| 75 | er_ 381 |
| 76 | me 372 |
| 77 | _w 368 |
| 78 | f 361 |
| 79 | on 351 |
| 80 | _t 351 |
| 81 | _va 345 |
| 82 | _g 342 |
| 83 | di 342 |
| 84 | nt 340 |
| 85 | , 335 |
| 86 | g_ 335 |
| 87 | ,_ 334 |
| 88 | van 327 |
| 89 | ch 326 |
| 90 | is 326 |
| 91 | ing 325 |
| 92 | be 325 |
| 93 | ni 320 |
| 94 | it 317 |
| 95 | een 316 |
| 96 | _van 315 |
| 97 | al 310 |
| 98 | den 309 |
| 99 | ti 309 |
| 100 | van_ 307 |
| 101 | oe 302 |
| 102 | ke 302 |
| 103 | _van_ 299 |
| 104 | aar 299 |
| 105 | d_ 295 |
| 106 | we 293 |
| 107 | da 292 |
| 108 | tu 290 |
| 109 | _ee 290 |
| 110 | ud 287 |
| 111 | een_ 286 |
| 112 | li 284 |
| 113 | es 282 |
| 114 | _st 281 |
| 115 | ver 281 |
| 116 | ten 281 |
| 117 | ri 275 |
| 118 | nde 275 |
| 119 | der 274 |
| 120 | _in 270 |
| 121 | k_ 268 |
| 122 | vo 267 |
| 123 | het 266 |
| 124 | oor 264 |
| 125 | _het 262 |
| 126 | het_ 262 |
| 127 | _het_ 259 |
| 128 | _een 258 |
| 129 | l_ 258 |
| 130 | ze 257 |
| 131 | _n 254 |
| 132 | ro 248 |
| 133 | gen 243 |
| 134 | _een_ 241 |
| 135 | at_ 240 |
| 136 | op 238 |
| 137 | n. 238 |
| 138 | _en 237 |
| 139 | rs 237 |
| 140 | _da 235 |
| 141 | stu 232 |
| 142 | in_ 230 |
| 143 | _be 229 |
| 144 | _ge 228 |
| 145 | _k 226 |
| 146 | rd 226 |
| 147 | tud 220 |
| 148 | _en_ 220 |
| 149 | n._ 217 |
| 150 | te_ 209 |
| 151 | ei 208 |
| 152 | ent 206 |
| 153 | _me 203 |
| 154 | la 202 |
| 155 | ek 202 |
| 156 | ed 201 |
| 157 | ra 200 |
| 158 | stud 200 |
| 159 | en. 200 |
| 160 | ie_ 197 |
| 161 | ste 196 |
| 162 | _vo 195 |
| 163 | _in_ 193 |
| 164 | _stu 191 |
| 165 | zi 191 |
| 166 | om 189 |
| 167 | ui 189 |
| 168 | en._ 186 |
| 169 | ten_ 185 |
| 170 | _stud 185 |
| 171 | ude 184 |
| 172 | die 183 |
| 173 | ns 183 |
| 174 | _j 181 |
| 175 | D 179 |
| 176 | aan 179 |
| 177 | se 179 |
| 178 | ma 178 |
| 179 | _ve 176 |
| 180 | ne 174 |
| 181 | _p 174 |
| 182 | eg 173 |
| 183 | p_ 172 |
| 184 | ar_ 172 |
| 185 | aar_ 171 |
| 186 | _te 170 |
| 187 | ng_ 169 |
| 188 | _we 169 |
| 189 | '' 167 |
| 190 | _D 165 |
| 191 | ers 164 |
| 192 | _op 163 |
| 193 | dat 161 |
| 194 | dat_ 160 |
| 195 | ig 160 |
| 196 | ere 159 |
| 197 | eer 158 |
| 198 | _zi 158 |
| 199 | voor 156 |
| 200 | voo 156 |
| 201 | nge 155 |
| 202 | nder 151 |
| 203 | nte 151 |
| 204 | or_ 150 |
| 205 | ta 150 |
| 206 | je 149 |
| 207 | ing_ 148 |
| 208 | ll 148 |
| 209 | _ver 147 |
| 210 | jk 146 |
| 211 | oor_ 146 |
| 212 | _dat 145 |
| 213 | ijk 145 |
| 214 | ren 145 |
| 215 | is_ 145 |
| 216 | _dat_ 144 |
| 217 | _l 144 |
| 218 | and 144 |
| 219 | lij 143 |
| 220 | ter 143 |
| 221 | na 142 |
| 222 | uden 139 |
| 223 | tude 138 |
| 224 | _voor 136 |
| 225 | _voo 136 |
| 226 | ond 136 |
| 227 | ken 135 |
| 228 | cht 135 |
| 229 | _al 135 |
| 230 | ht 135 |
| 231 | wa 134 |
| 232 | ho 133 |
| 233 | em 133 |
| 234 | den_ 133 |
| 235 | pe 132 |
| 236 | sc 132 |
| 237 | un 131 |
| 238 | ur 131 |
| 239 | _di 130 |
| 240 | gen_ 130 |
| 241 | zo 129 |
| 242 | rt 129 |
| 243 | ev 128 |
| 244 | mo 128 |
| 245 | lijk 127 |
| 246 | _is 126 |
| 247 | stude 124 |
| 248 | ha 123 |
| 249 | to 122 |
| 250 | el_ 121 |
| 251 | og 121 |
| 252 | op_ 121 |
| 253 | sch 120 |
| 254 | ol 120 |
| 255 | ente 119 |
| 256 | _u 118 |
| 257 | pr 118 |
| 258 | end 118 |
| 259 | mi 117 |
| 260 | iet 116 |
| 261 | _aa 116 |
| 262 | eli 115 |
| 263 | dent 115 |
| 264 | ijn 115 |
| 265 | jn 115 |
| 266 | ou 115 |
| 267 | men 114 |
| 268 | _' 114 |
| 269 | tie 113 |
| 270 | _is_ 113 |
| 271 | nie 113 |
| 272 | tr 112 |
| 273 | ak 112 |
| 274 | id 112 |
| 275 | udent 111 |
| 276 | tuden 111 |
| 277 | uit 110 |
| 278 | _te_ 109 |
| 279 | aan_ 109 |
| 280 | ld 109 |
| 281 | S 108 |
| 282 | _aan 108 |
| 283 | ede 108 |
| 284 | ja 107 |
| 285 | nten 107 |
| 286 | it_ 107 |
| 287 | je_ 107 |
| 288 | ts 107 |
| 289 | erd 106 |
| 290 | est 106 |
| 291 | E 105 |
| 292 | _op_ 105 |
| 293 | ad 104 |
| 294 | al_ 104 |
| 295 | _ze 104 |
| 296 | _on 104 |
| 297 | rk 104 |
| 298 | lle 103 |
| 299 | ens 103 |
| 300 | gel 103 |
| 301 | m_ 103 |
| 302 | len 103 |
| 303 | _r 102 |
| 304 | ec 102 |
| 305 | inge 102 |
| 306 | met 102 |
| 307 | _met 101 |
| 308 | si 100 |
| 309 | die_ 100 |
| 310 | us 100 |
| 311 | onde 99 |
| 312 | _ni 99 |
| 313 | De 99 |
| 314 | eu 99 |
| 315 | dente 99 |
| 316 | enten 99 |
| 317 | ic 99 |
| 318 | _met_ 98 |
| 319 | f_ 98 |
| 320 | met_ 98 |
| 321 | no 97 |
| 322 | ko 96 |
| 323 | voor_ 96 |
| 324 | rde 96 |
| 325 | H 96 |
| 326 | ngen 95 |
| 327 | lo 95 |
| 328 | ot 95 |
| 329 | as 94 |
| 330 | zij 93 |
| 331 | _nie 92 |
| 332 | vi 92 |
| 333 | eb 92 |
| 334 | _De 92 |
| 335 | _zij 91 |
| 336 | ep 91 |
| 337 | wi 91 |
| 338 | _zo 91 |
| 339 | kt 91 |
| 340 | ege 91 |
| 341 | G 91 |
| 342 | bi 90 |
| 343 | j_ 90 |
| 344 | ij_ 90 |
| 345 | ze_ 90 |
| 346 | do 90 |
| 347 | lan 89 |
| 348 | ov 89 |
| 349 | udi 89 |
| 350 | ord 89 |
| 351 | onder 89 |
| 352 | V 88 |
| 353 | elij 88 |
| 354 | _wa 88 |
| 355 | elijk 88 |
| 356 | ef 88 |
| 357 | _die 87 |
| 358 | ag 86 |
| 359 | erk 86 |
| 360 | eren 86 |
| 361 | R 85 |
| 362 | ik 85 |
| 363 | _ma 85 |
| 364 | gr 85 |
| 365 | am 85 |
| 366 | _mo 84 |
| 367 | ul 84 |
| 368 | nn 83 |
| 369 | eve 83 |
| 370 | De_ 83 |
| 371 | maa 83 |
| 372 | ingen 83 |
| 373 | wo 83 |
| 374 | _'' 83 |
| 375 | O 83 |
| 376 | tudi 82 |
| 377 | I 82 |
| 378 | nt_ 82 |
| 379 | tudie 81 |
| 380 | ven 81 |
| 381 | udie 81 |
| 382 | nten_ 81 |
| 383 | _die_ 81 |
| 384 | jaa 80 |
| 385 | ka 80 |
| 386 | eke 80 |
| 387 | ite 80 |
| 388 | a_ 80 |
| 389 | _je 80 |
| 390 | ac 80 |
| 391 | jaar 80 |
| 392 | _je_ 79 |
| 393 | _H 79 |
| 394 | _zijn 79 |
| 395 | zijn 79 |
| 396 | n, 78 |
| 397 | nen 78 |
| 398 | N 78 |
| 399 | n,_ 78 |
| 400 | ijn_ 77 |
-
diff --git a/xapian-applications/omega/langclass/english.lm b/xapian-applications/omega/langclass/english.lm
new file mode 100644
index 0000000..ab71632
-
|
+
|
|
| 1 | _ 20326 |
| 2 | e 6617 |
| 3 | t 4843 |
| 4 | o 3834 |
| 5 | n 3653 |
| 6 | i 3602 |
| 7 | a 3433 |
| 8 | s 2945 |
| 9 | r 2921 |
| 10 | h 2507 |
| 11 | e_ 2000 |
| 12 | d 1816 |
| 13 | _t 1785 |
| 14 | c 1639 |
| 15 | l 1635 |
| 16 | th 1535 |
| 17 | he 1351 |
| 18 | _th 1333 |
| 19 | u 1309 |
| 20 | f 1253 |
| 21 | m 1175 |
| 22 | p 1151 |
| 23 | _a 1145 |
| 24 | the 1142 |
| 25 | _the 1060 |
| 26 | s_ 978 |
| 27 | er 968 |
| 28 | _o 967 |
| 29 | he_ 928 |
| 30 | d_ 888 |
| 31 | t_ 885 |
| 32 | the_ 844 |
| 33 | _the_ 843 |
| 34 | on 842 |
| 35 | in 817 |
| 36 | y 783 |
| 37 | n_ 773 |
| 38 | b 761 |
| 39 | re 754 |
| 40 | , 734 |
| 41 | ,_ 732 |
| 42 | an 732 |
| 43 | g 728 |
| 44 | w 718 |
| 45 | _i 707 |
| 46 | en 676 |
| 47 | f_ 599 |
| 48 | y_ 595 |
| 49 | of 594 |
| 50 | _of 592 |
| 51 | es 589 |
| 52 | ti 587 |
| 53 | v 580 |
| 54 | _of_ 575 |
| 55 | of_ 575 |
| 56 | nd 568 |
| 57 | at 549 |
| 58 | r_ 540 |
| 59 | _w 534 |
| 60 | it 522 |
| 61 | ed 496 |
| 62 | _p 494 |
| 63 | nt 485 |
| 64 | _c 462 |
| 65 | o_ 457 |
| 66 | io 450 |
| 67 | _an 439 |
| 68 | te 432 |
| 69 | or 425 |
| 70 | _b 418 |
| 71 | nd_ 407 |
| 72 | to 406 |
| 73 | st 402 |
| 74 | is 401 |
| 75 | _s 396 |
| 76 | _in 389 |
| 77 | ion 385 |
| 78 | and 385 |
| 79 | de 384 |
| 80 | ve 382 |
| 81 | ha 375 |
| 82 | ar 366 |
| 83 | _m 361 |
| 84 | and_ 360 |
| 85 | _and 360 |
| 86 | _and_ 358 |
| 87 | se 353 |
| 88 | _to 347 |
| 89 | me 346 |
| 90 | to_ 344 |
| 91 | ed_ 339 |
| 92 | . 330 |
| 93 | be 329 |
| 94 | _f 329 |
| 95 | ._ 329 |
| 96 | _to_ 320 |
| 97 | co 317 |
| 98 | ic 316 |
| 99 | ns 308 |
| 100 | al 307 |
| 101 | le 304 |
| 102 | ou 304 |
| 103 | ce 293 |
| 104 | ent 279 |
| 105 | l_ 278 |
| 106 | _co 277 |
| 107 | tio 275 |
| 108 | on_ 274 |
| 109 | _d 274 |
| 110 | tion 268 |
| 111 | ri 266 |
| 112 | _e 264 |
| 113 | ng 253 |
| 114 | hi 251 |
| 115 | er_ 249 |
| 116 | ea 246 |
| 117 | as 245 |
| 118 | _be 242 |
| 119 | pe 242 |
| 120 | h_ 234 |
| 121 | _r 232 |
| 122 | ec 227 |
| 123 | ch 223 |
| 124 | ro 222 |
| 125 | ct 220 |
| 126 | _h 219 |
| 127 | pr 217 |
| 128 | in_ 217 |
| 129 | ne 214 |
| 130 | ll 214 |
| 131 | rt 213 |
| 132 | s,_ 210 |
| 133 | s, 210 |
| 134 | li 209 |
| 135 | ra 208 |
| 136 | T 207 |
| 137 | wh 204 |
| 138 | a_ 203 |
| 139 | ac 201 |
| 140 | _wh 199 |
| 141 | _n 196 |
| 142 | ts 196 |
| 143 | di 196 |
| 144 | es_ 195 |
| 145 | si 194 |
| 146 | re_ 193 |
| 147 | at_ 192 |
| 148 | nc 192 |
| 149 | ie 190 |
| 150 | _a_ 188 |
| 151 | _in_ 185 |
| 152 | ing 184 |
| 153 | us 182 |
| 154 | _re 182 |
| 155 | g_ 179 |
| 156 | ng_ 178 |
| 157 | op 178 |
| 158 | con 177 |
| 159 | tha 175 |
| 160 | _l 174 |
| 161 | _tha 174 |
| 162 | ver 173 |
| 163 | ma 173 |
| 164 | ion_ 171 |
| 165 | _con 171 |
| 166 | ci 170 |
| 167 | ons 170 |
| 168 | _it 170 |
| 169 | po 169 |
| 170 | ere 168 |
| 171 | is_ 167 |
| 172 | ta 167 |
| 173 | la 166 |
| 174 | _pr 165 |
| 175 | fo 164 |
| 176 | ho 164 |
| 177 | ir 162 |
| 178 | ss 161 |
| 179 | men 160 |
| 180 | be_ 160 |
| 181 | un 159 |
| 182 | ty 159 |
| 183 | _be_ 158 |
| 184 | ing_ 157 |
| 185 | om 156 |
| 186 | ot 156 |
| 187 | hat 155 |
| 188 | ly 155 |
| 189 | _g 155 |
| 190 | em 153 |
| 191 | _T 151 |
| 192 | rs 150 |
| 193 | mo 148 |
| 194 | ch_ 148 |
| 195 | wi 147 |
| 196 | we 147 |
| 197 | ad 147 |
| 198 | ts_ 145 |
| 199 | res 143 |
| 200 | _wi 143 |
| 201 | I 143 |
| 202 | hat_ 142 |
| 203 | ei 141 |
| 204 | ly_ 141 |
| 205 | ni 140 |
| 206 | os 140 |
| 207 | ca 139 |
| 208 | ur 139 |
| 209 | A 138 |
| 210 | ut 138 |
| 211 | that 138 |
| 212 | _that 137 |
| 213 | ati 137 |
| 214 | _fo 137 |
| 215 | st_ 137 |
| 216 | il 136 |
| 217 | or_ 136 |
| 218 | for 136 |
| 219 | pa 136 |
| 220 | ul 135 |
| 221 | ate 135 |
| 222 | ter 134 |
| 223 | it_ 134 |
| 224 | nt_ 133 |
| 225 | that_ 132 |
| 226 | _ha 129 |
| 227 | al_ 128 |
| 228 | el 128 |
| 229 | as_ 127 |
| 230 | ll_ 127 |
| 231 | _ma 125 |
| 232 | no 124 |
| 233 | ment 124 |
| 234 | an_ 124 |
| 235 | tion_ 122 |
| 236 | su 122 |
| 237 | bl 122 |
| 238 | _de 122 |
| 239 | nce 120 |
| 240 | pl 120 |
| 241 | fe 119 |
| 242 | tr 118 |
| 243 | so 118 |
| 244 | int 115 |
| 245 | ov 114 |
| 246 | e, 114 |
| 247 | e,_ 114 |
| 248 | _u 113 |
| 249 | ent_ 113 |
| 250 | Th 113 |
| 251 | her 113 |
| 252 | j 112 |
| 253 | atio 112 |
| 254 | ation 112 |
| 255 | _Th 111 |
| 256 | le_ 110 |
| 257 | ai 110 |
| 258 | _it_ 110 |
| 259 | _on 110 |
| 260 | _for 109 |
| 261 | ect 109 |
| 262 | k 109 |
| 263 | hic 108 |
| 264 | est 108 |
| 265 | der 107 |
| 266 | tu 107 |
| 267 | na 106 |
| 268 | _by_ 106 |
| 269 | by_ 106 |
| 270 | E 106 |
| 271 | by 106 |
| 272 | _by 106 |
| 273 | ve_ 106 |
| 274 | _di 106 |
| 275 | en_ 104 |
| 276 | vi 104 |
| 277 | m_ 103 |
| 278 | _whi 102 |
| 279 | iv 102 |
| 280 | whi 102 |
| 281 | ns_ 102 |
| 282 | _A 101 |
| 283 | ich 100 |
| 284 | ge 100 |
| 285 | pro 99 |
| 286 | ess 99 |
| 287 | _whic 99 |
| 288 | ers 99 |
| 289 | hich 99 |
| 290 | ce_ 99 |
| 291 | which 99 |
| 292 | whic 99 |
| 293 | all 98 |
| 294 | ove 98 |
| 295 | _is 98 |
| 296 | ich_ 97 |
| 297 | ee 97 |
| 298 | hich_ 97 |
| 299 | n,_ 96 |
| 300 | n, 96 |
| 301 | im 95 |
| 302 | ir_ 94 |
| 303 | hei 94 |
| 304 | ions 94 |
| 305 | sti 94 |
| 306 | se_ 94 |
| 307 | per 93 |
| 308 | The 93 |
| 309 | _pa 93 |
| 310 | heir 93 |
| 311 | id 93 |
| 312 | eir 93 |
| 313 | eir_ 93 |
| 314 | ig 93 |
| 315 | heir_ 93 |
| 316 | _no 93 |
| 317 | ev 93 |
| 318 | era 92 |
| 319 | _int 92 |
| 320 | ted 91 |
| 321 | _The 91 |
| 322 | ies 91 |
| 323 | art 91 |
| 324 | thei 90 |
| 325 | _ar 90 |
| 326 | _thei 90 |
| 327 | their 90 |
| 328 | _pro 90 |
| 329 | et 89 |
| 330 | _pe 88 |
| 331 | _mo 88 |
| 332 | ther 88 |
| 333 | x 87 |
| 334 | gh 87 |
| 335 | S 87 |
| 336 | _is_ 87 |
| 337 | ol 87 |
| 338 | ty_ 87 |
| 339 | _I 86 |
| 340 | nde 86 |
| 341 | am 86 |
| 342 | rn 86 |
| 343 | nte 86 |
| 344 | mp 85 |
| 345 | _su 84 |
| 346 | _we 84 |
| 347 | par 84 |
| 348 | _v 84 |
| 349 | pu 82 |
| 350 | his 82 |
| 351 | ow 82 |
| 352 | mi 82 |
| 353 | go 81 |
| 354 | N 81 |
| 355 | ue 81 |
| 356 | ple 81 |
| 357 | ep 80 |
| 358 | ab 80 |
| 359 | ;_ 80 |
| 360 | ; 80 |
| 361 | ex 80 |
| 362 | ain 80 |
| 363 | over 80 |
| 364 | _un 79 |
| 365 | q 79 |
| 366 | qu 79 |
| 367 | pp 79 |
| 368 | ith 79 |
| 369 | ry 79 |
| 370 | _as 79 |
| 371 | ber 79 |
| 372 | ub 78 |
| 373 | av 78 |
| 374 | uc 78 |
| 375 | s._ 77 |
| 376 | s. 77 |
| 377 | enc 77 |
| 378 | are 77 |
| 379 | iti 77 |
| 380 | gr 76 |
| 381 | his_ 76 |
| 382 | ua 76 |
| 383 | part 76 |
| 384 | ff 75 |
| 385 | eve 75 |
| 386 | O 75 |
| 387 | rea 74 |
| 388 | ous 74 |
| 389 | ia 74 |
| 390 | The_ 73 |
| 391 | ag 73 |
| 392 | mb 73 |
| 393 | _go 73 |
| 394 | fa 72 |
| 395 | on,_ 72 |
| 396 | ern 72 |
| 397 | t,_ 72 |
| 398 | on, 72 |
| 399 | t, 72 |
| 400 | _me 71 |
-
diff --git a/xapian-applications/omega/langclass/finnish.lm b/xapian-applications/omega/langclass/finnish.lm
new file mode 100644
index 0000000..328f886
-
|
+
|
|
| 1 | _ 19984 |
| 2 | a 9133 |
| 3 | i 8384 |
| 4 | t 7797 |
| 5 | e 6481 |
| 6 | n 6431 |
| 7 | s 5897 |
| 8 | l 4504 |
| 9 | o 4163 |
| 10 | u 4106 |
| 11 | k 4013 |
| 12 | ä 3354 |
| 13 | n_ 2868 |
| 14 | m 2569 |
| 15 | a_ 1987 |
| 16 | v 1905 |
| 17 | r 1827 |
| 18 | ta 1580 |
| 19 | en 1553 |
| 20 | is 1515 |
| 21 | h 1508 |
| 22 | y 1462 |
| 23 | st 1390 |
| 24 | in 1375 |
| 25 | p 1342 |
| 26 | j 1333 |
| 27 | an 1139 |
| 28 | si 1073 |
| 29 | tt 1030 |
| 30 | te 1008 |
| 31 | en_ 982 |
| 32 | _k 980 |
| 33 | it 974 |
| 34 | ll 947 |
| 35 | aa 942 |
| 36 | ä_ 902 |
| 37 | va 878 |
| 38 | el 855 |
| 39 | _t 851 |
| 40 | ka 846 |
| 41 | i_ 835 |
| 42 | . 832 |
| 43 | se 818 |
| 44 | li 806 |
| 45 | tä 804 |
| 46 | oi 767 |
| 47 | ai 744 |
| 48 | ._ 739 |
| 49 | tu 734 |
| 50 | _o 719 |
| 51 | mi 715 |
| 52 | al 703 |
| 53 | on 684 |
| 54 | d 681 |
| 55 | _v 662 |
| 56 | et 654 |
| 57 | _j 641 |
| 58 | t_ 635 |
| 59 | ti 632 |
| 60 | _m 628 |
| 61 | _s 620 |
| 62 | ja 616 |
| 63 | ma 596 |
| 64 | sa 595 |
| 65 | la 582 |
| 66 | ist 575 |
| 67 | _e 565 |
| 68 | to 565 |
| 69 | ks 557 |
| 70 | in_ 554 |
| 71 | es 551 |
| 72 | il 538 |
| 73 | an_ 536 |
| 74 | ki 527 |
| 75 | , 525 |
| 76 | ku 525 |
| 77 | ,_ 524 |
| 78 | us 520 |
| 79 | as 514 |
| 80 | nt 512 |
| 81 | ri 495 |
| 82 | ke 494 |
| 83 | at 491 |
| 84 | _p 485 |
| 85 | le 484 |
| 86 | ik 483 |
| 87 | ss 477 |
| 88 | ut 469 |
| 89 | ö 469 |
| 90 | sta 460 |
| 91 | ee 459 |
| 92 | uu 458 |
| 93 | ol 457 |
| 94 | ta_ 451 |
| 95 | ne 445 |
| 96 | ää 445 |
| 97 | ei 443 |
| 98 | uo 436 |
| 99 | ko 433 |
| 100 | un 430 |
| 101 | lu 421 |
| 102 | ii 420 |
| 103 | e_ 418 |
| 104 | nn 413 |
| 105 | _h 412 |
| 106 | ar 408 |
| 107 | er 402 |
| 108 | än 396 |
| 109 | ja_ 386 |
| 110 | im 381 |
| 111 | on_ 365 |
| 112 | _va 363 |
| 113 | aan 354 |
| 114 | _a 352 |
| 115 | me 350 |
| 116 | ak 345 |
| 117 | ssa 331 |
| 118 | na 330 |
| 119 | ie 329 |
| 120 | pa 327 |
| 121 | _ja 326 |
| 122 | ia 325 |
| 123 | tä_ 322 |
| 124 | _l 319 |
| 125 | vi 317 |
| 126 | ise 316 |
| 127 | tta 315 |
| 128 | de 314 |
| 129 | os 312 |
| 130 | lli 309 |
| 131 | _ja_ 304 |
| 132 | jo 295 |
| 133 | vä 290 |
| 134 | su 289 |
| 135 | au 287 |
| 136 | lis 286 |
| 137 | _on 285 |
| 138 | sä 284 |
| 139 | uk 280 |
| 140 | am 280 |
| 141 | ot 280 |
| 142 | ty 275 |
| 143 | ett 271 |
| 144 | ttä 270 |
| 145 | ni 269 |
| 146 | lä 267 |
| 147 | ksi 264 |
| 148 | nk 264 |
| 149 | ht 263 |
| 150 | ul 261 |
| 151 | ell 261 |
| 152 | sa_ 259 |
| 153 | ha 257 |
| 154 | sen 257 |
| 155 | a. 254 |
| 156 | isi 253 |
| 157 | ste 253 |
| 158 | aan_ 252 |
| 159 | _on_ 252 |
| 160 | _ka 252 |
| 161 | sk 251 |
| 162 | kk 246 |
| 163 | itt 245 |
| 164 | ok 242 |
| 165 | a._ 239 |
| 166 | all 239 |
| 167 | yt 239 |
| 168 | mä 237 |
| 169 | mu 237 |
| 170 | av 237 |
| 171 | _y 236 |
| 172 | lla 233 |
| 173 | taa 231 |
| 174 | ais 231 |
| 175 | een 230 |
| 176 | K 230 |
| 177 | lt 228 |
| 178 | s_ 227 |
| 179 | ast 227 |
| 180 | iv 226 |
| 181 | ssa_ 225 |
| 182 | ra 225 |
| 183 | - 223 |
| 184 | kse 223 |
| 185 | oit 220 |
| 186 | om 220 |
| 187 | T 219 |
| 188 | _ku 218 |
| 189 | än_ 216 |
| 190 | aa_ 214 |
| 191 | at_ 214 |
| 192 | tel 211 |
| 193 | ui 210 |
| 194 | si_ 208 |
| 195 | rk 207 |
| 196 | sta_ 207 |
| 197 | _jo 203 |
| 198 | kä 202 |
| 199 | _K 201 |
| 200 | est 200 |
| 201 | em 200 |
| 202 | he 199 |
| 203 | _n 199 |
| 204 | vo 198 |
| 205 | _ta 196 |
| 206 | eh 196 |
| 207 | _ol 196 |
| 208 | S 196 |
| 209 | nta 196 |
| 210 | _ko 194 |
| 211 | je 194 |
| 212 | stä 194 |
| 213 | är 193 |
| 214 | ust 191 |
| 215 | mis 191 |
| 216 | ns 190 |
| 217 | pu 189 |
| 218 | nen 188 |
| 219 | ät 188 |
| 220 | toi 188 |
| 221 | iin 187 |
| 222 | ten 187 |
| 223 | min 186 |
| 224 | ista 185 |
| 225 | hd 184 |
| 226 | a, 184 |
| 227 | a,_ 184 |
| 228 | sen_ 183 |
| 229 | E 182 |
| 230 | lle 181 |
| 231 | vat 179 |
| 232 | ill 177 |
| 233 | no 176 |
| 234 | pä 176 |
| 235 | lm 176 |
| 236 | llis 175 |
| 237 | n. 175 |
| 238 | io 172 |
| 239 | ine 171 |
| 240 | n._ 170 |
| 241 | pi 169 |
| 242 | uks 168 |
| 243 | ava 168 |
| 244 | ään 166 |
| 245 | nen_ 165 |
| 246 | ah 165 |
| 247 | _mu 164 |
| 248 | tus 163 |
| 249 | mm 162 |
| 250 | _to 162 |
| 251 | ek 160 |
| 252 | int 159 |
| 253 | _r 159 |
| 254 | lin 158 |
| 255 | oim 158 |
| 256 | _T 158 |
| 257 | A 158 |
| 258 | imi 157 |
| 259 | tö 157 |
| 260 | la_ 157 |
| 261 | jä 157 |
| 262 | aj 156 |
| 263 | yh 155 |
| 264 | o_ 154 |
| 265 | lo 154 |
| 266 | oli 153 |
| 267 | een_ 153 |
| 268 | le_ 153 |
| 269 | _si 153 |
| 270 | g 152 |
| 271 | aik 151 |
| 272 | vat_ 150 |
| 273 | L 149 |
| 274 | ur 149 |
| 275 | ti_ 149 |
| 276 | sia 148 |
| 277 | ite 147 |
| 278 | inen 147 |
| 279 | ain 146 |
| 280 | sti 146 |
| 281 | lla_ 146 |
| 282 | ys 145 |
| 283 | _mi 145 |
| 284 | val 144 |
| 285 | stu 144 |
| 286 | äm 144 |
| 287 | alli 143 |
| 288 | pe 143 |
| 289 | utt 142 |
| 290 | et_ 141 |
| 291 | _tu 141 |
| 292 | eri 140 |
| 293 | _E 140 |
| 294 | : 140 |
| 295 | nki 139 |
| 296 | ir 139 |
| 297 | llä 138 |
| 298 | up 138 |
| 299 | äi 137 |
| 300 | ama 137 |
| 301 | _ha 135 |
| 302 | id 135 |
| 303 | _se 135 |
| 304 | po 134 |
| 305 | inen_ 134 |
| 306 | tte 133 |
| 307 | nna 133 |
| 308 | ten_ 132 |
| 309 | or 132 |
| 310 | ts 131 |
| 311 | nä 131 |
| 312 | yk 131 |
| 313 | äs 131 |
| 314 | _S 130 |
| 315 | ses 130 |
| 316 | ve 130 |
| 317 | ess 129 |
| 318 | äl 129 |
| 319 | ita 129 |
| 320 | lai 129 |
| 321 | H 129 |
| 322 | van 127 |
| 323 | äk 127 |
| 324 | kin 127 |
| 325 | N 127 |
| 326 | _te 126 |
| 327 | den 126 |
| 328 | tee 126 |
| 329 | P 126 |
| 330 | kaa 126 |
| 331 | iin_ 125 |
| 332 | kun 125 |
| 333 | ois 125 |
| 334 | sit 125 |
| 335 | oh 124 |
| 336 | V 124 |
| 337 | yö 124 |
| 338 | äv 124 |
| 339 | tav 124 |
| 340 | voi 124 |
| 341 | ia_ 123 |
| 342 | I 123 |
| 343 | oll 123 |
| 344 | maa 122 |
| 345 | ih 122 |
| 346 | oj 122 |
| 347 | rj 121 |
| 348 | ro 121 |
| 349 | ikk 120 |
| 350 | so 120 |
| 351 | oo 120 |
| 352 | oimi 120 |
| 353 | do 120 |
| 354 | pp 119 |
| 355 | M 119 |
| 356 | _ei 118 |
| 357 | toim 118 |
| 358 | op 118 |
| 359 | uut 118 |
| 360 | tet 118 |
| 361 | _i 118 |
| 362 | _ma 117 |
| 363 | vai 117 |
| 364 | lä_ 116 |
| 365 | u_ 116 |
| 366 | sy 116 |
| 367 | kau 116 |
| 368 | utta 116 |
| 369 | un_ 115 |
| 370 | eu 115 |
| 371 | ssä 115 |
| 372 | tti 115 |
| 373 | _sa 115 |
| 374 | mp 114 |
| 375 | eis 114 |
| 376 | ka_ 112 |
| 377 | että 112 |
| 378 | taa_ 111 |
| 379 | _et 111 |
| 380 | hu 111 |
| 381 | itu 111 |
| 382 | suu 111 |
| 383 | den_ 111 |
| 384 | ksen 110 |
| 385 | ap 110 |
| 386 | _ke 110 |
| 387 | uv 110 |
| 388 | tam 110 |
| 389 | yv 109 |
| 390 | aup 109 |
| 391 | stä_ 109 |
| 392 | asta 109 |
| 393 | äy 109 |
| 394 | kan 108 |
| 395 | nu 108 |
| 396 | ukse 108 |
| 397 | _toi 107 |
| 398 | ien 107 |
| 399 | hi 107 |
| 400 | iss 107 |
-
diff --git a/xapian-applications/omega/langclass/french.lm b/xapian-applications/omega/langclass/french.lm
new file mode 100644
index 0000000..5080d9c
-
|
+
|
|
| 1 | _ 20800 |
| 2 | e 7258 |
| 3 | i 4051 |
| 4 | s 4003 |
| 5 | a 3972 |
| 6 | n 3903 |
| 7 | r 3650 |
| 8 | t 3590 |
| 9 | u 2968 |
| 10 | o 2823 |
| 11 | l 2723 |
| 12 | e_ 2632 |
| 13 | d 2241 |
| 14 | s_ 1721 |
| 15 | _d 1693 |
| 16 | c 1663 |
| 17 | p 1528 |
| 18 | é 1320 |
| 19 | m 1297 |
| 20 | es 1164 |
| 21 | t_ 1106 |
| 22 | _l 1079 |
| 23 | de 1048 |
| 24 | on 959 |
| 25 | _de 940 |
| 26 | en 939 |
| 27 | _p 852 |
| 28 | nt 825 |
| 29 | le 808 |
| 30 | es_ 791 |
| 31 | re 777 |
| 32 | , 721 |
| 33 | ,_ 720 |
| 34 | n_ 703 |
| 35 | de_ 685 |
| 36 | ' 670 |
| 37 | an 667 |
| 38 | _de_ 645 |
| 39 | v 641 |
| 40 | _s 610 |
| 41 | r_ 596 |
| 42 | _c 594 |
| 43 | er 585 |
| 44 | ai 575 |
| 45 | _a 558 |
| 46 | _e 554 |
| 47 | ou 554 |
| 48 | q 549 |
| 49 | qu 538 |
| 50 | is 530 |
| 51 | te 528 |
| 52 | ti 525 |
| 53 | ur 519 |
| 54 | it 514 |
| 55 | g 498 |
| 56 | a_ 490 |
| 57 | f 480 |
| 58 | la 476 |
| 59 | in 475 |
| 60 | _le 441 |
| 61 | me 436 |
| 62 | nt_ 432 |
| 63 | . 427 |
| 64 | b 427 |
| 65 | ra 423 |
| 66 | io 416 |
| 67 | ent 415 |
| 68 | ._ 404 |
| 69 | ne 395 |
| 70 | ns 392 |
| 71 | ion 383 |
| 72 | h 381 |
| 73 | ue 376 |
| 74 | se 371 |
| 75 | le_ 370 |
| 76 | ar 370 |
| 77 | ie 362 |
| 78 | co 361 |
| 79 | at 359 |
| 80 | tr 359 |
| 81 | et 349 |
| 82 | pr 342 |
| 83 | ce 336 |
| 84 | au 328 |
| 85 | u_ 321 |
| 86 | il 314 |
| 87 | _r 313 |
| 88 | _la 304 |
| 89 | un 303 |
| 90 | eu 303 |
| 91 | st 300 |
| 92 | re_ 296 |
| 93 | ro 290 |
| 94 | la_ 288 |
| 95 | on_ 287 |
| 96 | _m 286 |
| 97 | _la_ 283 |
| 98 | que 281 |
| 99 | _qu 280 |
| 100 | _q 280 |
| 101 | po 275 |
| 102 | tio 273 |
| 103 | tion 273 |
| 104 | pa 273 |
| 105 | li 271 |
| 106 | _t 269 |
| 107 | nc 268 |
| 108 | si 266 |
| 109 | _pr 265 |
| 110 | ri 264 |
| 111 | al 263 |
| 112 | ui 262 |
| 113 | _co 259 |
| 114 | i_ 255 |
| 115 | ta 255 |
| 116 | é_ 251 |
| 117 | x 247 |
| 118 | em 244 |
| 119 | l_ 243 |
| 120 | et_ 238 |
| 121 | _l' 236 |
| 122 | l' 236 |
| 123 | les 233 |
| 124 | ns_ 233 |
| 125 | ir 232 |
| 126 | _le_ 228 |
| 127 | ent_ 227 |
| 128 | or 226 |
| 129 | ré 224 |
| 130 | _f 224 |
| 131 | ne_ 222 |
| 132 | à 221 |
| 133 | ve 220 |
| 134 | ch 220 |
| 135 | it_ 219 |
| 136 | di 219 |
| 137 | oi 217 |
| 138 | - 216 |
| 139 | ni 215 |
| 140 | à_ 215 |
| 141 | les_ 215 |
| 142 | d' 214 |
| 143 | el 212 |
| 144 | ss 212 |
| 145 | _n 212 |
| 146 | ut 211 |
| 147 | our 210 |
| 148 | des 210 |
| 149 | " 208 |
| 150 | ur_ 207 |
| 151 | nd 207 |
| 152 | er_ 206 |
| 153 | ait 206 |
| 154 | ion_ 204 |
| 155 | rs 202 |
| 156 | _en 201 |
| 157 | _et 200 |
| 158 | j 200 |
| 159 | _d' 200 |
| 160 | ll 199 |
| 161 | _des 198 |
| 162 | des_ 197 |
| 163 | _pa 197 |
| 164 | té 196 |
| 165 | _et_ 195 |
| 166 | _à 195 |
| 167 | _à_ 195 |
| 168 | om 193 |
| 169 | ma 192 |
| 170 | ati 190 |
| 171 | _des_ 189 |
| 172 | L 188 |
| 173 | so 187 |
| 174 | _u 185 |
| 175 | è 184 |
| 176 | _" 183 |
| 177 | sa 182 |
| 178 | _po 181 |
| 179 | tre 181 |
| 180 | dé 181 |
| 181 | ue_ 180 |
| 182 | pe 179 |
| 183 | en_ 179 |
| 184 | ont 178 |
| 185 | _un 178 |
| 186 | _L 178 |
| 187 | us 176 |
| 188 | _les 176 |
| 189 | _les_ 176 |
| 190 | rt 176 |
| 191 | is_ 173 |
| 192 | _i 173 |
| 193 | du 172 |
| 194 | e,_ 171 |
| 195 | e, 171 |
| 196 | na 171 |
| 197 | s, 170 |
| 198 | s,_ 170 |
| 199 | as 169 |
| 200 | men 169 |
| 201 | M 167 |
| 202 | ait_ 167 |
| 203 | 'a 166 |
| 204 | vi 162 |
| 205 | ci 159 |
| 206 | ant 158 |
| 207 | _au 158 |
| 208 | da 157 |
| 209 | _M 157 |
| 210 | ation 155 |
| 211 | atio 155 |
| 212 | con 154 |
| 213 | que_ 153 |
| 214 | ons 153 |
| 215 | eur 151 |
| 216 | est 149 |
| 217 | me_ 149 |
| 218 | mi 149 |
| 219 | par 148 |
| 220 | tion_ 148 |
| 221 | _so 147 |
| 222 | te_ 147 |
| 223 | res 144 |
| 224 | lo 144 |
| 225 | ment 144 |
| 226 | és 144 |
| 227 | ans 143 |
| 228 | _du 142 |
| 229 | du_ 141 |
| 230 | ux 141 |
| 231 | un_ 140 |
| 232 | y 138 |
| 233 | pro 138 |
| 234 | _du_ 136 |
| 235 | _dé 136 |
| 236 | ce_ 135 |
| 237 | _se 134 |
| 238 | _re 134 |
| 239 | pl 133 |
| 240 | A 132 |
| 241 | ge 131 |
| 242 | ic 131 |
| 243 | su 130 |
| 244 | x_ 129 |
| 245 | ien 129 |
| 246 | nce 129 |
| 247 | "_ 129 |
| 248 | ac 128 |
| 249 | il_ 128 |
| 250 | qui 128 |
| 251 | _pro 127 |
| 252 | no 127 |
| 253 | av 126 |
| 254 | _v 125 |
| 255 | _o 125 |
| 256 | rs_ 125 |
| 257 | ans_ 124 |
| 258 | eme 124 |
| 259 | bl 123 |
| 260 | emen 122 |
| 261 | _en_ 122 |
| 262 | iqu 122 |
| 263 | ct 122 |
| 264 | iq 122 |
| 265 | lle 122 |
| 266 | nn 121 |
| 267 | ts 121 |
| 268 | ement 121 |
| 269 | ét 120 |
| 270 | _"_ 120 |
| 271 | ér 119 |
| 272 | té_ 119 |
| 273 | _ce 119 |
| 274 | mp 119 |
| 275 | ire 119 |
| 276 | ui_ 119 |
| 277 | to 118 |
| 278 | he 117 |
| 279 | _é 117 |
| 280 | ca 117 |
| 281 | _j 116 |
| 282 | ec 116 |
| 283 | va 116 |
| 284 | _par 116 |
| 285 | ée 115 |
| 286 | _con 115 |
| 287 | se_ 114 |
| 288 | tre_ 113 |
| 289 | ique 112 |
| 290 | dan 111 |
| 291 | éc 111 |
| 292 | ha 110 |
| 293 | une 110 |
| 294 | P 110 |
| 295 | lu 110 |
| 296 | ux_ 109 |
| 297 | _b 108 |
| 298 | s. 108 |
| 299 | pou 108 |
| 300 | _pou 108 |
| 301 | ier 107 |
| 302 | C 107 |
| 303 | ais 106 |
| 304 | s._ 105 |
| 305 | ain 104 |
| 306 | _un_ 104 |
| 307 | nte 103 |
| 308 | 'e 103 |
| 309 | mo 103 |
| 310 | mm 103 |
| 311 | ment_ 102 |
| 312 | une_ 102 |
| 313 | com 101 |
| 314 | _P 101 |
| 315 | 'i 101 |
| 316 | _ma 100 |
| 317 | do 99 |
| 318 | ant_ 98 |
| 319 | anc 98 |
| 320 | che 97 |
| 321 | ap 97 |
| 322 | ont_ 97 |
| 323 | _que 97 |
| 324 | os 97 |
| 325 | urs 96 |
| 326 | _di 96 |
| 327 | fi 96 |
| 328 | im 96 |
| 329 | pour 96 |
| 330 | _pour 96 |
| 331 | ê 95 |
| 332 | ts_ 95 |
| 333 | _g 95 |
| 334 | our_ 94 |
| 335 | _sa 94 |
| 336 | ntr 94 |
| 337 | _da 94 |
| 338 | _ré 93 |
| 339 | rai 93 |
| 340 | rm 93 |
| 341 | _qui 93 |
| 342 | e. 92 |
| 343 | am 92 |
| 344 | _com 91 |
| 345 | uv 91 |
| 346 | _C 91 |
| 347 | D 91 |
| 348 | qui_ 90 |
| 349 | e._ 90 |
| 350 | pu 89 |
| 351 | _qui_ 88 |
| 352 | ia 87 |
| 353 | _dan 87 |
| 354 | _dans 87 |
| 355 | dans 87 |
| 356 | ter 87 |
| 357 | fo 87 |
| 358 | son 87 |
| 359 | dans_ 87 |
| 360 | id 86 |
| 361 | ag 86 |
| 362 | ine 86 |
| 363 | tu 85 |
| 364 | ran 85 |
| 365 | au_ 85 |
| 366 | ol 85 |
| 367 | oc 84 |
| 368 | est_ 84 |
| 369 | st_ 84 |
| 370 | enc 84 |
| 371 | F 82 |
| 372 | _tr 81 |
| 373 | 'u 81 |
| 374 | tai 81 |
| 375 | ell 80 |
| 376 | R 79 |
| 377 | _su 79 |
| 378 | S 79 |
| 379 | ions 79 |
| 380 | pré 79 |
| 381 | sé 78 |
| 382 | ab 78 |
| 383 | né 77 |
| 384 | _que_ 77 |
| 385 | _in 77 |
| 386 | _av 76 |
| 387 | pour_ 76 |
| 388 | fa 76 |
| 389 | rr 76 |
| 390 | air 75 |
| 391 | _ch 75 |
| 392 | _a_ 75 |
| 393 | ba 74 |
| 394 | _pl 74 |
| 395 | gr 74 |
| 396 | tt 74 |
| 397 | ssi 74 |
| 398 | rd 73 |
| 399 | pas 73 |
| 400 | bi 73 |
-
diff --git a/xapian-applications/omega/langclass/german.lm b/xapian-applications/omega/langclass/german.lm
new file mode 100644
index 0000000..eb4eda0
-
|
+
|
|
| 1 | _ 31586 |
| 2 | e 15008 |
| 3 | n 9058 |
| 4 | i 7299 |
| 5 | r 6830 |
| 6 | t 5662 |
| 7 | s 5348 |
| 8 | a 4618 |
| 9 | h 4176 |
| 10 | d 4011 |
| 11 | er 3415 |
| 12 | en 3412 |
| 13 | u 3341 |
| 14 | l 3266 |
| 15 | n_ 2848 |
| 16 | c 2636 |
| 17 | ch 2460 |
| 18 | g 2407 |
| 19 | o 2376 |
| 20 | e_ 2208 |
| 21 | r_ 2128 |
| 22 | m 2077 |
| 23 | _d 1948 |
| 24 | de 1831 |
| 25 | en_ 1786 |
| 26 | ei 1718 |
| 27 | er_ 1570 |
| 28 | in 1568 |
| 29 | te 1505 |
| 30 | ie 1505 |
| 31 | b 1458 |
| 32 | t_ 1425 |
| 33 | f 1306 |
| 34 | k 1176 |
| 35 | ge 1144 |
| 36 | s_ 1137 |
| 37 | un 1113 |
| 38 | , 1104 |
| 39 | ,_ 1099 |
| 40 | w 1099 |
| 41 | z 1060 |
| 42 | nd 1039 |
| 43 | he 1004 |
| 44 | st 989 |
| 45 | _s 952 |
| 46 | _de 949 |
| 47 | . 909 |
| 48 | _e 906 |
| 49 | ne 906 |
| 50 | der 880 |
| 51 | ._ 847 |
| 52 | be 841 |
| 53 | es 829 |
| 54 | ic 796 |
| 55 | _a 791 |
| 56 | ie_ 779 |
| 57 | is 769 |
| 58 | ich 763 |
| 59 | an 755 |
| 60 | re 749 |
| 61 | di 732 |
| 62 | ein 730 |
| 63 | se 730 |
| 64 | " 720 |
| 65 | ng 709 |
| 66 | _i 706 |
| 67 | sc 683 |
| 68 | sch 681 |
| 69 | it 673 |
| 70 | der_ 652 |
| 71 | h_ 651 |
| 72 | ch_ 642 |
| 73 | S 630 |
| 74 | le 609 |
| 75 | p 609 |
| 76 | ä 607 |
| 77 | ü 603 |
| 78 | au 603 |
| 79 | v 602 |
| 80 | che 599 |
| 81 | _w 596 |
| 82 | d_ 585 |
| 83 | die 576 |
| 84 | _di 572 |
| 85 | m_ 562 |
| 86 | _die 559 |
| 87 | el 548 |
| 88 | _S 540 |
| 89 | _der 529 |
| 90 | li 527 |
| 91 | _der_ 523 |
| 92 | si 515 |
| 93 | al 514 |
| 94 | ns 507 |
| 95 | on 501 |
| 96 | or 495 |
| 97 | ti 490 |
| 98 | ten 487 |
| 99 | ht 486 |
| 100 | die_ 485 |
| 101 | _die_ 483 |
| 102 | D 479 |
| 103 | rt 478 |
| 104 | nd_ 476 |
| 105 | _u 470 |
| 106 | nt 468 |
| 107 | A 466 |
| 108 | in_ 464 |
| 109 | den 461 |
| 110 | cht 447 |
| 111 | und 443 |
| 112 | me 440 |
| 113 | _z 429 |
| 114 | ung 426 |
| 115 | ll 423 |
| 116 | _un 421 |
| 117 | _ei 419 |
| 118 | _n 415 |
| 119 | hr 412 |
| 120 | ine 412 |
| 121 | _A 408 |
| 122 | _ein 405 |
| 123 | ar 404 |
| 124 | ra 403 |
| 125 | _v 400 |
| 126 | _g 400 |
| 127 | as 395 |
| 128 | zu 392 |
| 129 | et 389 |
| 130 | em 385 |
| 131 | _D 380 |
| 132 | eine 376 |
| 133 | gen 376 |
| 134 | g_ 376 |
| 135 | da 368 |
| 136 | we 366 |
| 137 | K 365 |
| 138 | lt 360 |
| 139 | B 354 |
| 140 | _" 353 |
| 141 | nde 349 |
| 142 | ni 347 |
| 143 | und_ 345 |
| 144 | E 345 |
| 145 | ur 345 |
| 146 | _m 342 |
| 147 | ri 341 |
| 148 | ha 340 |
| 149 | eh 339 |
| 150 | ten_ 338 |
| 151 | es_ 336 |
| 152 | _K 336 |
| 153 | _und 335 |
| 154 | ig 335 |
| 155 | _b 335 |
| 156 | hen 334 |
| 157 | _und_ 332 |
| 158 | _au 329 |
| 159 | _B 327 |
| 160 | _da 325 |
| 161 | _zu 324 |
| 162 | _in 322 |
| 163 | at 321 |
| 164 | us 318 |
| 165 | wi 307 |
| 166 | n, 305 |
| 167 | n,_ 304 |
| 168 | nn 304 |
| 169 | te_ 301 |
| 170 | eit 301 |
| 171 | _h 300 |
| 172 | ter 299 |
| 173 | M 298 |
| 174 | n. 295 |
| 175 | ß 294 |
| 176 | ng_ 289 |
| 177 | sche 289 |
| 178 | - 283 |
| 179 | rs 282 |
| 180 | den_ 282 |
| 181 | _si 280 |
| 182 | G 280 |
| 183 | im 278 |
| 184 | _ge 277 |
| 185 | chen 276 |
| 186 | rd 273 |
| 187 | _E 273 |
| 188 | n._ 270 |
| 189 | icht 270 |
| 190 | rn 268 |
| 191 | uf 267 |
| 192 | isch 264 |
| 193 | isc 264 |
| 194 | nen 263 |
| 195 | _in_ 262 |
| 196 | _M 260 |
| 197 | _er 257 |
| 198 | ich_ 255 |
| 199 | ac 253 |
| 200 | lic 252 |
| 201 | _G 252 |
| 202 | ber 252 |
| 203 | la 251 |
| 204 | vo 251 |
| 205 | eb 250 |
| 206 | ke 249 |
| 207 | F 248 |
| 208 | as_ 248 |
| 209 | hen_ 248 |
| 210 | ach 245 |
| 211 | en, 244 |
| 212 | ung_ 243 |
| 213 | lich 243 |
| 214 | ste 243 |
| 215 | en,_ 243 |
| 216 | _k 241 |
| 217 | ben 241 |
| 218 | _f 241 |
| 219 | en. 241 |
| 220 | _be 239 |
| 221 | it_ 239 |
| 222 | L 238 |
| 223 | _se 237 |
| 224 | mi 236 |
| 225 | ve 236 |
| 226 | na 236 |
| 227 | on_ 236 |
| 228 | P 235 |
| 229 | ss 234 |
| 230 | ist 234 |
| 231 | ö 234 |
| 232 | ht_ 233 |
| 233 | ru 233 |
| 234 | st_ 229 |
| 235 | _F 229 |
| 236 | ts 227 |
| 237 | ab 226 |
| 238 | W 226 |
| 239 | ol 225 |
| 240 | _eine 225 |
| 241 | hi 225 |
| 242 | so 224 |
| 243 | em_ 223 |
| 244 | "_ 223 |
| 245 | ren 222 |
| 246 | en._ 221 |
| 247 | chen_ 221 |
| 248 | R 221 |
| 249 | ta 221 |
| 250 | ere 220 |
| 251 | ische 219 |
| 252 | ers 218 |
| 253 | ert 217 |
| 254 | _P 217 |
| 255 | tr 217 |
| 256 | ed 215 |
| 257 | ze 215 |
| 258 | eg 215 |
| 259 | ens 215 |
| 260 | ür 213 |
| 261 | ah 212 |
| 262 | _vo 212 |
| 263 | ne_ 211 |
| 264 | cht_ 210 |
| 265 | uc 209 |
| 266 | _wi 209 |
| 267 | nge 208 |
| 268 | lle 208 |
| 269 | fe 207 |
| 270 | _L 207 |
| 271 | ver 206 |
| 272 | hl 205 |
| 273 | V 204 |
| 274 | ma 203 |
| 275 | wa 203 |
| 276 | auf 201 |
| 277 | H 198 |
| 278 | _W 195 |
| 279 | T 195 |
| 280 | nte 193 |
| 281 | uch 193 |
| 282 | l_ 192 |
| 283 | sei 192 |
| 284 | nen_ 190 |
| 285 | u_ 189 |
| 286 | _den 189 |
| 287 | _al 189 |
| 288 | _V 188 |
| 289 | t. 188 |
| 290 | lte 187 |
| 291 | ut 186 |
| 292 | ent 184 |
| 293 | sich 183 |
| 294 | sic 183 |
| 295 | il 183 |
| 296 | ier 182 |
| 297 | am 181 |
| 298 | gen_ 180 |
| 299 | sen 179 |
| 300 | fü 178 |
| 301 | um 178 |
| 302 | t._ 177 |
| 303 | f_ 174 |
| 304 | he_ 174 |
| 305 | ner 174 |
| 306 | nst 174 |
| 307 | ls 174 |
| 308 | _sei 173 |
| 309 | ro 173 |
| 310 | ir 173 |
| 311 | ebe 173 |
| 312 | mm 173 |
| 313 | ag 172 |
| 314 | ern 169 |
| 315 | t,_ 169 |
| 316 | t, 169 |
| 317 | eu 169 |
| 318 | ft 168 |
| 319 | icht_ 167 |
| 320 | hre 167 |
| 321 | Be 166 |
| 322 | nz 165 |
| 323 | nder 165 |
| 324 | _T 164 |
| 325 | _den_ 164 |
| 326 | iche 163 |
| 327 | tt 163 |
| 328 | zu_ 162 |
| 329 | and 162 |
| 330 | J 161 |
| 331 | rde 160 |
| 332 | rei 160 |
| 333 | _we 159 |
| 334 | _H 159 |
| 335 | ige 159 |
| 336 | _Be 158 |
| 337 | rte 157 |
| 338 | hei 156 |
| 339 | das 155 |
| 340 | aus 155 |
| 341 | che_ 154 |
| 342 | _das 154 |
| 343 | _zu_ 154 |
| 344 | tz 154 |
| 345 | _ni 153 |
| 346 | das_ 153 |
| 347 | _R 153 |
| 348 | N 153 |
| 349 | des 153 |
| 350 | _ve 153 |
| 351 | _J 152 |
| 352 | I 152 |
| 353 | _das_ 152 |
| 354 | men 151 |
| 355 | _so 151 |
| 356 | _ver 151 |
| 357 | _auf 150 |
| 358 | ine_ 150 |
| 359 | _ha 150 |
| 360 | rg 149 |
| 361 | ind 148 |
| 362 | eben 148 |
| 363 | kt 147 |
| 364 | mit 147 |
| 365 | _an 147 |
| 366 | her 146 |
| 367 | Ge 146 |
| 368 | Sc 145 |
| 369 | _sich 145 |
| 370 | U 145 |
| 371 | Sch 145 |
| 372 | _sic 145 |
| 373 | end 145 |
| 374 | Di 144 |
| 375 | abe 143 |
| 376 | ck 143 |
| 377 | sse 142 |
| 378 | ür_ 142 |
| 379 | ell 142 |
| 380 | ik 141 |
| 381 | o_ 141 |
| 382 | nic 141 |
| 383 | nich 141 |
| 384 | sa 141 |
| 385 | _fü 140 |
| 386 | hn 140 |
| 387 | zi 140 |
| 388 | no 140 |
| 389 | nicht 140 |
| 390 | im_ 139 |
| 391 | von_ 139 |
| 392 | von 139 |
| 393 | _nic 139 |
| 394 | _nich 139 |
| 395 | eine_ 139 |
| 396 | oc 138 |
| 397 | wei 138 |
| 398 | io 138 |
| 399 | schen 138 |
| 400 | gt 138 |
-
diff --git a/xapian-applications/omega/langclass/italian.lm b/xapian-applications/omega/langclass/italian.lm
new file mode 100644
index 0000000..543cadc
-
|
+
|
|
| 1 | _ 25028 |
| 2 | a 7570 |
| 3 | e 6477 |
| 4 | i 5481 |
| 5 | o 5104 |
| 6 | l 3905 |
| 7 | n 3866 |
| 8 | r 3502 |
| 9 | t 2934 |
| 10 | c 2862 |
| 11 | s 2862 |
| 12 | a_ 2504 |
| 13 | e_ 2404 |
| 14 | d 2004 |
| 15 | i_ 1749 |
| 16 | o_ 1679 |
| 17 | u 1650 |
| 18 | v 1611 |
| 19 | p 1561 |
| 20 | m 1414 |
| 21 | _c 1325 |
| 22 | , 1192 |
| 23 | ,_ 1192 |
| 24 | _s 1190 |
| 25 | _d 1094 |
| 26 | g 1067 |
| 27 | an 925 |
| 28 | er 915 |
| 29 | _a 914 |
| 30 | _p 895 |
| 31 | la 858 |
| 32 | _l 830 |
| 33 | re 799 |
| 34 | ar 769 |
| 35 | h 762 |
| 36 | no 753 |
| 37 | co 726 |
| 38 | va 698 |
| 39 | _e 657 |
| 40 | n_ 656 |
| 41 | on 656 |
| 42 | ra 653 |
| 43 | to 651 |
| 44 | f 638 |
| 45 | di 638 |
| 46 | _i 634 |
| 47 | ch 634 |
| 48 | ll 633 |
| 49 | l_ 624 |
| 50 | la_ 598 |
| 51 | ta 593 |
| 52 | el 576 |
| 53 | in 567 |
| 54 | _m 558 |
| 55 | en 529 |
| 56 | b 528 |
| 57 | ri 525 |
| 58 | _co 523 |
| 59 | _n 523 |
| 60 | _di 522 |
| 61 | li 513 |
| 62 | av 507 |
| 63 | al 501 |
| 64 | le 494 |
| 65 | ia 492 |
| 66 | se 484 |
| 67 | ol 479 |
| 68 | _f 477 |
| 69 | or 477 |
| 70 | te 469 |
| 71 | _e_ 467 |
| 72 | ve 454 |
| 73 | at 449 |
| 74 | de 447 |
| 75 | . 443 |
| 76 | ne 429 |
| 77 | va_ 428 |
| 78 | ca 426 |
| 79 | ._ 422 |
| 80 | tt 422 |
| 81 | re_ 415 |
| 82 | nt 415 |
| 83 | io 411 |
| 84 | _v 407 |
| 85 | pe 405 |
| 86 | z 392 |
| 87 | to_ 391 |
| 88 | _ch 389 |
| 89 | na 384 |
| 90 | si 384 |
| 91 | ' 383 |
| 92 | he 382 |
| 93 | no_ 379 |
| 94 | ci 374 |
| 95 | _la 373 |
| 96 | ro 371 |
| 97 | _g 370 |
| 98 | st 368 |
| 99 | cc 366 |
| 100 | he_ 362 |
| 101 | di_ 362 |
| 102 | ma 358 |
| 103 | ev 354 |
| 104 | che 354 |
| 105 | es 352 |
| 106 | me 352 |
| 107 | pa 351 |
| 108 | _t 349 |
| 109 | ti 348 |
| 110 | _di_ 347 |
| 111 | ss 345 |
| 112 | che_ 344 |
| 113 | a,_ 337 |
| 114 | a, 337 |
| 115 | nd 335 |
| 116 | o, 333 |
| 117 | o,_ 333 |
| 118 | ell 330 |
| 119 | gl 323 |
| 120 | sa 322 |
| 121 | il 322 |
| 122 | gli 321 |
| 123 | da 318 |
| 124 | as 318 |
| 125 | do 314 |
| 126 | _che 308 |
| 127 | _che_ 306 |
| 128 | eva 306 |
| 129 | _la_ 300 |
| 130 | lla 298 |
| 131 | le_ 293 |
| 132 | un 291 |
| 133 | _pe 290 |
| 134 | _de 288 |
| 135 | q 283 |
| 136 | qu 283 |
| 137 | ava 280 |
| 138 | po 277 |
| 139 | on_ 275 |
| 140 | r_ 273 |
| 141 | li_ 273 |
| 142 | _b 269 |
| 143 | _il 268 |
| 144 | _il_ 268 |
| 145 | il_ 268 |
| 146 | lo 267 |
| 147 | om 263 |
| 148 | e, 263 |
| 149 | e,_ 263 |
| 150 | ni 258 |
| 151 | tr 258 |
| 152 | so 255 |
| 153 | ra_ 253 |
| 154 | os 251 |
| 155 | _in 249 |
| 156 | _u 248 |
| 157 | per 244 |
| 158 | are 243 |
| 159 | et 243 |
| 160 | _se 240 |
| 161 | ano 239 |
| 162 | si_ 238 |
| 163 | _ca 238 |
| 164 | _qu 238 |
| 165 | lla_ 238 |
| 166 | _q 238 |
| 167 | _a_ 236 |
| 168 | ac 236 |
| 169 | _r 234 |
| 170 | ic 233 |
| 171 | _no 232 |
| 172 | ie 227 |
| 173 | fa 227 |
| 174 | hi 226 |
| 175 | del 225 |
| 176 | ua 222 |
| 177 | _per 218 |
| 178 | ce 218 |
| 179 | _ma 216 |
| 180 | sc 216 |
| 181 | _del 215 |
| 182 | mi 212 |
| 183 | _un 208 |
| 184 | chi 206 |
| 185 | era 205 |
| 186 | i, 205 |
| 187 | i,_ 205 |
| 188 | su 203 |
| 189 | and 202 |
| 190 | vo 202 |
| 191 | _fa 201 |
| 192 | eva_ 200 |
| 193 | ano_ 199 |
| 194 | gli_ 197 |
| 195 | non 196 |
| 196 | pi 196 |
| 197 | vi 195 |
| 198 | er_ 195 |
| 199 | _al 194 |
| 200 | se_ 193 |
| 201 | _ne 192 |
| 202 | _non 191 |
| 203 | am 190 |
| 204 | is 187 |
| 205 | ava_ 187 |
| 206 | _non_ 186 |
| 207 | non_ 186 |
| 208 | in_ 185 |
| 209 | ent 185 |
| 210 | _si 184 |
| 211 | _pa 184 |
| 212 | com 183 |
| 213 | ! 182 |
| 214 | _le 182 |
| 215 | _su 181 |
| 216 | uo 181 |
| 217 | el_ 180 |
| 218 | !_ 180 |
| 219 | l' 178 |
| 220 | ue 177 |
| 221 | te_ 177 |
| 222 | _com 177 |
| 223 | are_ 176 |
| 224 | pr 176 |
| 225 | _in_ 176 |
| 226 | van 172 |
| 227 | mo 172 |
| 228 | ta_ 171 |
| 229 | gn 167 |
| 230 | ere 166 |
| 231 | na_ 166 |
| 232 | tto 163 |
| 233 | it 161 |
| 234 | _per_ 161 |
| 235 | per_ 161 |
| 236 | é 161 |
| 237 | all 160 |
| 238 | ess 159 |
| 239 | ut 159 |
| 240 | col 158 |
| 241 | acc 157 |
| 242 | gi 155 |
| 243 | lo_ 154 |
| 244 | oc 154 |
| 245 | vano 153 |
| 246 | io_ 153 |
| 247 | _av 151 |
| 248 | ndo 151 |
| 249 | é_ 151 |
| 250 | ato 149 |
| 251 | ave 148 |
| 252 | _st 147 |
| 253 | me_ 147 |
| 254 | 'a 146 |
| 255 | ia_ 144 |
| 256 | con 143 |
| 257 | mp 143 |
| 258 | fi 142 |
| 259 | ett 142 |
| 260 | _si_ 141 |
| 261 | _pi 140 |
| 262 | era_ 140 |
| 263 | ti_ 140 |
| 264 | ó 140 |
| 265 | vano_ 140 |
| 266 | _gl 139 |
| 267 | qua 139 |
| 268 | ella 139 |
| 269 | sta 138 |
| 270 | ome 137 |
| 271 | S 137 |
| 272 | _gli 137 |
| 273 | _S 137 |
| 274 | ad 136 |
| 275 | _ve 134 |
| 276 | ant 134 |
| 277 | ne_ 134 |
| 278 | ó_ 133 |
| 279 | sp 133 |
| 280 | do_ 133 |
| 281 | _po 132 |
| 282 | ro_ 132 |
| 283 | ov 132 |
| 284 | _le_ 131 |
| 285 | ella_ 130 |
| 286 | sse 129 |
| 287 | _con 128 |
| 288 | ir 128 |
| 289 | _vi 128 |
| 290 | ig 127 |
| 291 | _gli_ 127 |
| 292 | _ave 127 |
| 293 | vev 127 |
| 294 | un_ 126 |
| 295 | ot 126 |
| 296 | veva 125 |
| 297 | dell 125 |
| 298 | que 125 |
| 299 | a. 125 |
| 300 | _o 125 |
| 301 | a._ 124 |
| 302 | tu 124 |
| 303 | cia 123 |
| 304 | za 123 |
| 305 | _que 123 |
| 306 | _da 121 |
| 307 | par 121 |
| 308 | _pr 120 |
| 309 | cch 120 |
| 310 | _dell 120 |
| 311 | eg 119 |
| 312 | _sa 119 |
| 313 | o._ 119 |
| 314 | o. 119 |
| 315 | _col 118 |
| 316 | lt 118 |
| 317 | _un_ 118 |
| 318 | rt 118 |
| 319 | ur 117 |
| 320 | _vo 117 |
| 321 | _me 117 |
| 322 | ome_ 117 |
| 323 | L 116 |
| 324 | ap 116 |
| 325 | _L 116 |
| 326 | zi 116 |
| 327 | nto 116 |
| 328 | og 115 |
| 329 | _an 115 |
| 330 | _so 115 |
| 331 | em 114 |
| 332 | ag 114 |
| 333 | be 111 |
| 334 | ni_ 111 |
| 335 | im 110 |
| 336 | cchi 110 |
| 337 | ver 110 |
| 338 | lle 109 |
| 339 | nz 109 |
| 340 | cci 109 |
| 341 | _ri 109 |
| 342 | nc 108 |
| 343 | _er 108 |
| 344 | come_ 107 |
| 345 | come 107 |
| 346 | aveva 107 |
| 347 | ui 107 |
| 348 | avev 107 |
| 349 | tto_ 107 |
| 350 | _come 106 |
| 351 | ed 106 |
| 352 | P 105 |
| 353 | man 105 |
| 354 | _P 105 |
| 355 | rs 105 |
| 356 | occ 104 |
| 357 | ndo_ 103 |
| 358 | ato_ 103 |
| 359 | _qua 103 |
| 360 | _era 103 |
| 361 | ari 102 |
| 362 | ba 100 |
| 363 | _mo 100 |
| 364 | nel 100 |
| 365 | id 99 |
| 366 | men 98 |
| 367 | _fi 98 |
| 368 | _all 98 |
| 369 | rr 97 |
| 370 | _do 97 |
| 371 | _avev 97 |
| 372 | att 97 |
| 373 | l'a 96 |
| 374 | ei 96 |
| 375 | zz 96 |
| 376 | ; 96 |
| 377 | vol 95 |
| 378 | pp 95 |
| 379 | tra 95 |
| 380 | ;_ 95 |
| 381 | ere_ 94 |
| 382 | lle_ 94 |
| 383 | nda 94 |
| 384 | utt 94 |
| 385 | est 93 |
| 386 | _nel 93 |
| 387 | ul 92 |
| 388 | ola 92 |
| 389 | iv 92 |
| 390 | ando 90 |
| 391 | ale 90 |
| 392 | lu 90 |
| 393 | rn 90 |
| 394 | e. 89 |
| 395 | e._ 89 |
| 396 | ll' 89 |
| 397 | tta 88 |
| 398 | nte 87 |
| 399 | _l' 87 |
| 400 | uel 87 |
-
diff --git a/xapian-applications/omega/langclass/norwegian.lm b/xapian-applications/omega/langclass/norwegian.lm
new file mode 100644
index 0000000..f2c3cec
-
|
+
|
|
| 1 | _ 22970 |
| 2 | e 6833 |
| 3 | n 4206 |
| 4 | r 3516 |
| 5 | t 3112 |
| 6 | a 2587 |
| 7 | s 2440 |
| 8 | i 2112 |
| 9 | l 1901 |
| 10 | o 1900 |
| 11 | n_ 1875 |
| 12 | r_ 1761 |
| 13 | k 1713 |
| 14 | g 1630 |
| 15 | en 1615 |
| 16 | m 1508 |
| 17 | e_ 1450 |
| 18 | d 1444 |
| 19 | er 1436 |
| 20 | h 1306 |
| 21 | t_ 1300 |
| 22 | _h 1180 |
| 23 | _s 1148 |
| 24 | er_ 1105 |
| 25 | v 982 |
| 26 | en_ 976 |
| 27 | an 919 |
| 28 | . 901 |
| 29 | ._ 791 |
| 30 | _. 781 |
| 31 | et 770 |
| 32 | g_ 762 |
| 33 | _._ 726 |
| 34 | å 725 |
| 35 | u 719 |
| 36 | f 709 |
| 37 | p 702 |
| 38 | ha 682 |
| 39 | _ha 672 |
| 40 | de 657 |
| 41 | te 651 |
| 42 | _e 621 |
| 43 | et_ 614 |
| 44 | re 581 |
| 45 | ne 565 |
| 46 | _o 554 |
| 47 | an_ 544 |
| 48 | ke 534 |
| 49 | _, 522 |
| 50 | ,_ 522 |
| 51 | , 522 |
| 52 | _,_ 522 |
| 53 | _f 519 |
| 54 | _m 515 |
| 55 | or 503 |
| 56 | _d 483 |
| 57 | _i 480 |
| 58 | å_ 479 |
| 59 | se 476 |
| 60 | m_ 469 |
| 61 | nn 454 |
| 62 | b 449 |
| 63 | me 441 |
| 64 | ø 434 |
| 65 | _a 413 |
| 66 | st 404 |
| 67 | _t 398 |
| 68 | og 380 |
| 69 | _v 377 |
| 70 | _og 366 |
| 71 | ar 364 |
| 72 | el 364 |
| 73 | le 361 |
| 74 | i_ 356 |
| 75 | om 353 |
| 76 | og_ 351 |
| 77 | _og_ 351 |
| 78 | li 350 |
| 79 | _k 346 |
| 80 | _de 339 |
| 81 | ge 339 |
| 82 | han 337 |
| 83 | y 333 |
| 84 | _han 332 |
| 85 | ve 330 |
| 86 | kk 323 |
| 87 | in 311 |
| 88 | _b 307 |
| 89 | fo 301 |
| 90 | j 301 |
| 91 | il 298 |
| 92 | _H 291 |
| 93 | H 291 |
| 94 | han_ 288 |
| 95 | _han_ 288 |
| 96 | for 287 |
| 97 | ik 281 |
| 98 | l_ 278 |
| 99 | kke 277 |
| 100 | tt 276 |
| 101 | ti 270 |
| 102 | ne_ 270 |
| 103 | d_ 269 |
| 104 | ed 269 |
| 105 | om_ 268 |
| 106 | nne 266 |
| 107 | _me 264 |
| 108 | ng 257 |
| 109 | _er 257 |
| 110 | _fo 256 |
| 111 | eg 256 |
| 112 | _se 256 |
| 113 | _g 256 |
| 114 | un 255 |
| 115 | ig 255 |
| 116 | sk 253 |
| 117 | _er_ 252 |
| 118 | _p 252 |
| 119 | _for 250 |
| 120 | ke_ 249 |
| 121 | _n 238 |
| 122 | _l 233 |
| 123 | al 232 |
| 124 | ør 222 |
| 125 | s_ 221 |
| 126 | ar_ 215 |
| 127 | at 214 |
| 128 | _en 211 |
| 129 | he 211 |
| 130 | pe 209 |
| 131 | _i_ 208 |
| 132 | am 200 |
| 133 | es 200 |
| 134 | si 200 |
| 135 | enn 197 |
| 136 | det 195 |
| 137 | or_ 193 |
| 138 | vi 190 |
| 139 | ns 189 |
| 140 | ikk 188 |
| 141 | det_ 185 |
| 142 | so 185 |
| 143 | un_ 183 |
| 144 | il_ 181 |
| 145 | nd 181 |
| 146 | te_ 181 |
| 147 | "_ 180 |
| 148 | " 180 |
| 149 | _"_ 180 |
| 150 | _" 180 |
| 151 | em 179 |
| 152 | _ti 176 |
| 153 | kke_ 176 |
| 154 | lig 174 |
| 155 | ten 174 |
| 156 | Ha 173 |
| 157 | _Ha 173 |
| 158 | re_ 172 |
| 159 | ikke 168 |
| 160 | je 165 |
| 161 | Han 165 |
| 162 | ter 165 |
| 163 | _Han 165 |
| 164 | eg_ 164 |
| 165 | på 164 |
| 166 | _på 163 |
| 167 | _si 163 |
| 168 | _å 163 |
| 169 | _Han_ 162 |
| 170 | Han_ 162 |
| 171 | på_ 162 |
| 172 | _på_ 161 |
| 173 | til 160 |
| 174 | som 160 |
| 175 | _so 159 |
| 176 | den 159 |
| 177 | _det 157 |
| 178 | ed_ 155 |
| 179 | ll 155 |
| 180 | _ik 155 |
| 181 | rt 155 |
| 182 | som_ 153 |
| 183 | ra 152 |
| 184 | a_ 152 |
| 185 | har 152 |
| 186 | nt 152 |
| 187 | de_ 152 |
| 188 | tr 151 |
| 189 | v_ 151 |
| 190 | _har 151 |
| 191 | ka 151 |
| 192 | ig_ 150 |
| 193 | _som 150 |
| 194 | for_ 150 |
| 195 | _som_ 150 |
| 196 | _en_ 149 |
| 197 | hu 149 |
| 198 | _ikk 148 |
| 199 | _ham 148 |
| 200 | ham 148 |
| 201 | ste 148 |
| 202 | _det_ 148 |
| 203 | _ikke 148 |
| 204 | enne 148 |
| 205 | ikke_ 148 |
| 206 | har_ 147 |
| 207 | nge 147 |
| 208 | D 147 |
| 209 | _har_ 147 |
| 210 | _D 147 |
| 211 | am_ 147 |
| 212 | ere 147 |
| 213 | ham_ 146 |
| 214 | _ham_ 146 |
| 215 | it 145 |
| 216 | _he 144 |
| 217 | _til 144 |
| 218 | av 143 |
| 219 | va 140 |
| 220 | men 140 |
| 221 | år 140 |
| 222 | _ve 140 |
| 223 | _hu 139 |
| 224 | ta 139 |
| 225 | pen 137 |
| 226 | sp 137 |
| 227 | _st 135 |
| 228 | tte 135 |
| 229 | la 135 |
| 230 | _E 133 |
| 231 | E 133 |
| 232 | den_ 130 |
| 233 | is 130 |
| 234 | til_ 128 |
| 235 | _r 128 |
| 236 | tt_ 128 |
| 237 | år_ 127 |
| 238 | k_ 124 |
| 239 | _å_ 124 |
| 240 | ri 124 |
| 241 | _til_ 124 |
| 242 | at_ 123 |
| 243 | ene 123 |
| 244 | seg 123 |
| 245 | _av 123 |
| 246 | med 122 |
| 247 | _vi 122 |
| 248 | _seg 122 |
| 249 | seg_ 121 |
| 250 | _seg_ 121 |
| 251 | _for_ 120 |
| 252 | nne_ 120 |
| 253 | ut 120 |
| 254 | _u 119 |
| 255 | mm 119 |
| 256 | mme 119 |
| 257 | De 118 |
| 258 | _De 118 |
| 259 | _at 118 |
| 260 | _hun 117 |
| 261 | hun 117 |
| 262 | ko 117 |
| 263 | be 116 |
| 264 | _at_ 115 |
| 265 | ter_ 115 |
| 266 | pen_ 114 |
| 267 | ker 113 |
| 268 | hun_ 113 |
| 269 | _hun_ 113 |
| 270 | on 111 |
| 271 | lig_ 111 |
| 272 | .. 110 |
| 273 | hen 107 |
| 274 | _med 107 |
| 275 | rs 106 |
| 276 | ser 106 |
| 277 | med_ 105 |
| 278 | _men 104 |
| 279 | _hen 104 |
| 280 | _sk 104 |
| 281 | _med_ 104 |
| 282 | ak 103 |
| 283 | ans 103 |
| 284 | ker_ 102 |
| 285 | av_ 101 |
| 286 | _ka 101 |
| 287 | no 100 |
| 288 | ver 100 |
| 289 | ler 99 |
| 290 | J 99 |
| 291 | spe 99 |
| 292 | ten_ 99 |
| 293 | _J 99 |
| 294 | ene_ 98 |
| 295 | ld 98 |
| 296 | hv 98 |
| 297 | _av_ 98 |
| 298 | ger 97 |
| 299 | ni 96 |
| 300 | gen 96 |
| 301 | ie 95 |
| 302 | ser_ 94 |
| 303 | _et 94 |
| 304 | spen 94 |
| 305 | _hv 94 |
| 306 | men_ 93 |
| 307 | Espe 92 |
| 308 | Es 92 |
| 309 | _Esp 92 |
| 310 | _Es 92 |
| 311 | _Espe 92 |
| 312 | Esp 92 |
| 313 | _al 92 |
| 314 | Espen 92 |
| 315 | lle 89 |
| 316 | rem 89 |
| 317 | id 89 |
| 318 | fø 89 |
| 319 | ei 88 |
| 320 | inn 88 |
| 321 | rd 88 |
| 322 | enne_ 88 |
| 323 | _henn 87 |
| 324 | henne 87 |
| 325 | henn 87 |
| 326 | kt 86 |
| 327 | spen_ 86 |
| 328 | _om 86 |
| 329 | ler_ 86 |
| 330 | da 86 |
| 331 | ett 86 |
| 332 | itt 86 |
| 333 | bl 85 |
| 334 | to 85 |
| 335 | _Je 84 |
| 336 | ger_ 84 |
| 337 | Je 84 |
| 338 | æ 84 |
| 339 | ma 83 |
| 340 | ing 83 |
| 341 | ær 83 |
| 342 | ns_ 83 |
| 343 | eli 82 |
| 344 | ang 82 |
| 345 | _be 82 |
| 346 | så 82 |
| 347 | _den 82 |
| 348 | pp 81 |
| 349 | rk 81 |
| 350 | dr 81 |
| 351 | oe 81 |
| 352 | ss 81 |
| 353 | _fø 80 |
| 354 | ek 80 |
| 355 | le_ 79 |
| 356 | _no 79 |
| 357 | kj 78 |
| 358 | elig 78 |
| 359 | nes 78 |
| 360 | nn_ 77 |
| 361 | nk 77 |
| 362 | fr 77 |
| 363 | sl 77 |
| 364 | my 77 |
| 365 | kan 77 |
| 366 | så_ 76 |
| 367 | as 76 |
| 368 | _om_ 76 |
| 369 | _kan 75 |
| 370 | _ko 75 |
| 371 | _bl 73 |
| 372 | Hu 73 |
| 373 | nen 73 |
| 374 | _Hu 73 |
| 375 | eng 73 |
| 376 | gj 73 |
| 377 | rt_ 72 |
| 378 | ge_ 72 |
| 379 | ba 72 |
| 380 | lv 71 |
| 381 | rer 71 |
| 382 | nde 71 |
| 383 | ls 70 |
| 384 | lo 70 |
| 385 | ga 70 |
| 386 | _noe 70 |
| 387 | ro 70 |
| 388 | _den_ 70 |
| 389 | _ut 70 |
| 390 | noe 70 |
| 391 | Hun 69 |
| 392 | Hun_ 69 |
| 393 | _in 69 |
| 394 | _Hun 69 |
| 395 | _Hun_ 69 |
| 396 | ren 68 |
| 397 | øre 68 |
| 398 | ør_ 68 |
| 399 | sen 68 |
| 400 | sa 67 |
-
diff --git a/xapian-applications/omega/langclass/portuguese.lm b/xapian-applications/omega/langclass/portuguese.lm
new file mode 100644
index 0000000..4e1a5d7
-
|
+
|
|
| 1 | _ 35328 |
| 2 | a 10423 |
| 3 | e 10132 |
| 4 | o 8919 |
| 5 | s 6795 |
| 6 | r 6033 |
| 7 | i 5443 |
| 8 | n 4588 |
| 9 | d 4531 |
| 10 | t 4217 |
| 11 | m 3476 |
| 12 | u 3404 |
| 13 | o_ 3240 |
| 14 | a_ 3029 |
| 15 | e_ 2879 |
| 16 | c 2756 |
| 17 | s_ 2461 |
| 18 | _d 2379 |
| 19 | l 2307 |
| 20 | p 2242 |
| 21 | _a 1753 |
| 22 | de 1751 |
| 23 | , 1660 |
| 24 | ,_ 1658 |
| 25 | _e 1454 |
| 26 | es 1447 |
| 27 | os 1412 |
| 28 | ra 1343 |
| 29 | _p 1328 |
| 30 | nt 1302 |
| 31 | _de 1248 |
| 32 | do 1215 |
| 33 | en 1176 |
| 34 | re 1150 |
| 35 | as 1123 |
| 36 | v 1115 |
| 37 | m_ 1113 |
| 38 | de_ 1096 |
| 39 | er 1082 |
| 40 | g 1053 |
| 41 | _c 1047 |
| 42 | da 1008 |
| 43 | co 986 |
| 44 | os_ 975 |
| 45 | te 974 |
| 46 | ar 950 |
| 47 | or 943 |
| 48 | q 938 |
| 49 | qu 938 |
| 50 | _s 908 |
| 51 | ta 902 |
| 52 | _de_ 901 |
| 53 | _o 858 |
| 54 | se 841 |
| 55 | ue 831 |
| 56 | to 799 |
| 57 | ad 777 |
| 58 | . 761 |
| 59 | que 752 |
| 60 | em 751 |
| 61 | an 748 |
| 62 | f 746 |
| 63 | r_ 745 |
| 64 | b 732 |
| 65 | st 718 |
| 66 | is 716 |
| 67 | al 712 |
| 68 | _qu 706 |
| 69 | _q 706 |
| 70 | in 701 |
| 71 | as_ 696 |
| 72 | ã 695 |
| 73 | do_ 685 |
| 74 | ent 678 |
| 75 | ão 677 |
| 76 | _n 671 |
| 77 | _co 660 |
| 78 | _a_ 654 |
| 79 | _m 646 |
| 80 | on 645 |
| 81 | ç 624 |
| 82 | ri 623 |
| 83 | _que 619 |
| 84 | ma 602 |
| 85 | po 581 |
| 86 | ia 580 |
| 87 | ão_ 575 |
| 88 | ._ 573 |
| 89 | na 572 |
| 90 | me 564 |
| 91 | ro 554 |
| 92 | _t 544 |
| 93 | pa 533 |
| 94 | da_ 528 |
| 95 | h 523 |
| 96 | ue_ 515 |
| 97 | ca 511 |
| 98 | que_ 509 |
| 99 | nte 503 |
| 100 | no 499 |
| 101 | tr 498 |
| 102 | am 496 |
| 103 | em_ 491 |
| 104 | _que_ 487 |
| 105 | _se 485 |
| 106 | om 471 |
| 107 | io 460 |
| 108 | _do 459 |
| 109 | ti 448 |
| 110 | ci 445 |
| 111 | _da 444 |
| 112 | nd 442 |
| 113 | ei 435 |
| 114 | ra_ 435 |
| 115 | pr 427 |
| 116 | _r 423 |
| 117 | _e_ 420 |
| 118 | _f 420 |
| 119 | ss 412 |
| 120 | es_ 412 |
| 121 | el 407 |
| 122 | id 406 |
| 123 | _o_ 399 |
| 124 | _pa 390 |
| 125 | um 379 |
| 126 | pe 378 |
| 127 | _po 376 |
| 128 | la 374 |
| 129 | ir 371 |
| 130 | á 371 |
| 131 | ic 362 |
| 132 | di 362 |
| 133 | li 359 |
| 134 | é 359 |
| 135 | _re 353 |
| 136 | ve 353 |
| 137 | mo 350 |
| 138 | s, 349 |
| 139 | s,_ 349 |
| 140 | ou 347 |
| 141 | com 340 |
| 142 | sa 338 |
| 143 | si 338 |
| 144 | men 337 |
| 145 | rt 331 |
| 146 | _i 330 |
| 147 | con 330 |
| 148 | o, 327 |
| 149 | _da_ 326 |
| 150 | o,_ 326 |
| 151 | se_ 325 |
| 152 | _com 325 |
| 153 | ado 323 |
| 154 | to_ 322 |
| 155 | ai 322 |
| 156 | it 320 |
| 157 | A 319 |
| 158 | ec 316 |
| 159 | dos 316 |
| 160 | _em 312 |
| 161 | ção 310 |
| 162 | aç 310 |
| 163 | çã 310 |
| 164 | ara 305 |
| 165 | so 299 |
| 166 | tu 299 |
| 167 | res 297 |
| 168 | im 296 |
| 169 | _pr 295 |
| 170 | mi 293 |
| 171 | ua 292 |
| 172 | nto 291 |
| 173 | ment 290 |
| 174 | í 290 |
| 175 | par 288 |
| 176 | _do_ 287 |
| 177 | ce 286 |
| 178 | est 286 |
| 179 | u_ 284 |
| 180 | ente 284 |
| 181 | S 278 |
| 182 | l_ 278 |
| 183 | _u 278 |
| 184 | " 276 |
| 185 | ni 276 |
| 186 | z 274 |
| 187 | sta 273 |
| 188 | nc 272 |
| 189 | _em_ 270 |
| 190 | P 269 |
| 191 | ção_ 267 |
| 192 | _v 267 |
| 193 | at 267 |
| 194 | dos_ 266 |
| 195 | _es 262 |
| 196 | « 259 |
| 197 | _« 259 |
| 198 | te_ 258 |
| 199 | » 257 |
| 200 | va 255 |
| 201 | le 252 |
| 202 | ur 252 |
| 203 | _um 252 |
| 204 | vi 251 |
| 205 | _par 250 |
| 206 | a, 247 |
| 207 | a,_ 247 |
| 208 | _con 247 |
| 209 | ant 242 |
| 210 | lo 240 |
| 211 | ia_ 240 |
| 212 | gu 237 |
| 213 | ar_ 235 |
| 214 | ac 235 |
| 215 | e,_ 234 |
| 216 | e, 234 |
| 217 | no_ 232 |
| 218 | eg 232 |
| 219 | il 232 |
| 220 | ns 232 |
| 221 | er_ 231 |
| 222 | _ma 230 |
| 223 | por 230 |
| 224 | _in 228 |
| 225 | _l 226 |
| 226 | ó 225 |
| 227 | ont 224 |
| 228 | _no 223 |
| 229 | _P 222 |
| 230 | tra 220 |
| 231 | E 219 |
| 232 | ida 218 |
| 233 | is_ 217 |
| 234 | ol 216 |
| 235 | açã 215 |
| 236 | ter 215 |
| 237 | ação 215 |
| 238 | _A 211 |
| 239 | un 211 |
| 240 | - 210 |
| 241 | _te 210 |
| 242 | or_ 209 |
| 243 | ma_ 208 |
| 244 | _pe 208 |
| 245 | ara_ 208 |
| 246 | C 206 |
| 247 | ist 202 |
| 248 | para 202 |
| 249 | nta 201 |
| 250 | ais 201 |
| 251 | ut 198 |
| 252 | nte_ 198 |
| 253 | j 197 |
| 254 | dad 196 |
| 255 | _na 195 |
| 256 | am_ 195 |
| 257 | ade 193 |
| 258 | ica 191 |
| 259 | x 190 |
| 260 | al_ 189 |
| 261 | O 188 |
| 262 | des 187 |
| 263 | _para 187 |
| 264 | ada 187 |
| 265 | nh 186 |
| 266 | _se_ 186 |
| 267 | mp 185 |
| 268 | ndo 184 |
| 269 | R 183 |
| 270 | _por 181 |
| 271 | ação_ 181 |
| 272 | para_ 179 |
| 273 | eir 177 |
| 274 | ui 177 |
| 275 | vo 177 |
| 276 | ou_ 177 |
| 277 | ta_ 177 |
| 278 | M 176 |
| 279 | ria 175 |
| 280 | tos 175 |
| 281 | rr 174 |
| 282 | D 174 |
| 283 | io_ 174 |
| 284 | br 174 |
| 285 | _di 173 |
| 286 | õ 173 |
| 287 | õe 173 |
| 288 | fo 173 |
| 289 | I 172 |
| 290 | ões 172 |
| 291 | _C 171 |
| 292 | mo_ 171 |
| 293 | ov 170 |
| 294 | pro 169 |
| 295 | _os_ 169 |
| 296 | _os 169 |
| 297 | das 167 |
| 298 | iv 166 |
| 299 | uma 165 |
| 300 | gr 165 |
| 301 | su 164 |
| 302 | fi 164 |
| 303 | um_ 162 |
| 304 | na_ 162 |
| 305 | ga 162 |
| 306 | ais_ 161 |
| 307 | _S 161 |
| 308 | lh 159 |
| 309 | ort 159 |
| 310 | cia 158 |
| 311 | .. 157 |
| 312 | _est 156 |
| 313 | cont 156 |
| 314 | ig 155 |
| 315 | á_ 154 |
| 316 | ran 154 |
| 317 | ça 154 |
| 318 | om_ 153 |
| 319 | _en 152 |
| 320 | dade 152 |
| 321 | _as 152 |
| 322 | ho 152 |
| 323 | ntr 151 |
| 324 | nto_ 151 |
| 325 | fe 150 |
| 326 | N 149 |
| 327 | das_ 149 |
| 328 | uma_ 149 |
| 329 | ess 149 |
| 330 | é_ 148 |
| 331 | ndo_ 147 |
| 332 | ob 147 |
| 333 | »_ 147 |
| 334 | ul 146 |
| 335 | ente_ 146 |
| 336 | go 146 |
| 337 | ento 144 |
| 338 | ver 144 |
| 339 | _des 144 |
| 340 | gi 144 |
| 341 | ha 142 |
| 342 | cu 142 |
| 343 | idad 142 |
| 344 | av 141 |
| 345 | ões_ 141 |
| 346 | _pro 141 |
| 347 | ura 141 |
| 348 | ap 139 |
| 349 | _com_ 139 |
| 350 | _ca 139 |
| 351 | com_ 139 |
| 352 | ao 139 |
| 353 | ne 138 |
| 354 | od 138 |
| 355 | _" 137 |
| 356 | _M 137 |
| 357 | pre 137 |
| 358 | ras 136 |
| 359 | _me 136 |
| 360 | _ao 136 |
| 361 | _no_ 134 |
| 362 | oc 134 |
| 363 | str 133 |
| 364 | tes 133 |
| 365 | _b 133 |
| 366 | and 133 |
| 367 | _g 133 |
| 368 | ro_ 133 |
| 369 | omo 133 |
| 370 | _dos 132 |
| 371 | _fo 132 |
| 372 | _dos_ 132 |
| 373 | rn 132 |
| 374 | mento 131 |
| 375 | ito 131 |
| 376 | ev 131 |
| 377 | rio 130 |
| 378 | ass 130 |
| 379 | eu 130 |
| 380 | be 128 |
| 381 | os, 128 |
| 382 | os,_ 128 |
| 383 | sp 127 |
| 384 | _uma 127 |
| 385 | ep 126 |
| 386 | tad 125 |
| 387 | s. 125 |
| 388 | _uma_ 125 |
| 389 | _E 125 |
| 390 | idade 124 |
| 391 | _um_ 124 |
| 392 | nã 124 |
| 393 | não 124 |
| 394 | ct 123 |
| 395 | ram 123 |
| 396 | ado_ 123 |
| 397 | ela 123 |
| 398 | omo_ 121 |
| 399 | iz 121 |
| 400 | _an 121 |
-
diff --git a/xapian-applications/omega/langclass/russian-iso8859_5.lm b/xapian-applications/omega/langclass/russian-iso8859_5.lm
new file mode 100644
index 0000000..4768902
-
|
+
|
|
| 1 | _ 76249 |
| 2 | Þ 19732 |
| 3 | Õ 16714 |
| 4 | Ð 14389 |
| 5 | Ø 13942 |
| 6 | â 13160 |
| 7 | Ý 12444 |
| 8 | á 9867 |
| 9 | à 8461 |
| 10 | Ò 7895 |
| 11 | Û 7330 |
| 12 | Ú 6498 |
| 13 | Ü 5935 |
| 14 | . 5725 |
| 15 | ã 5287 |
| 16 | Ô 5019 |
| 17 | ß 4877 |
| 18 | ï 4083 |
| 19 | , 3899 |
| 20 | ,_ 3878 |
| 21 | ë 3656 |
| 22 | ì 3376 |
| 23 | Ø_ 3167 |
| 24 | _ß 3144 |
| 25 | Õ_ 3135 |
| 26 | Þ_ 3098 |
| 27 | - 3019 |
| 28 | × 2983 |
| 29 | _Ò 2952 |
| 30 | ._ 2930 |
| 31 | _á 2919 |
| 32 | ç 2887 |
| 33 | Ó 2876 |
| 34 | Ñ 2797 |
| 35 | áâ 2672 |
| 36 | _Ý 2631 |
| 37 | âÞ 2585 |
| 38 | .. 2407 |
| 39 | _Ø 2316 |
| 40 | ÝÞ 2300 |
| 41 | -_ 2294 |
| 42 | Ù 2281 |
| 43 | Ð_ 2249 |
| 44 | ÝÐ 2057 |
| 45 | ï_ 2029 |
| 46 | ÞÒ 1981 |
| 47 | ÝØ 1950 |
| 48 | _â 1944 |
| 49 | å 1874 |
| 50 | ÕÝ 1856 |
| 51 | _Þ 1774 |
| 52 | ... 1744 |
| 53 | àÐ 1709 |
| 54 | ÝÕ 1685 |
| 55 | ßÞ 1636 |
| 56 | _- 1625 |
| 57 | ÚÞ 1616 |
| 58 | âÕ 1595 |
| 59 | àÞ 1584 |
| 60 | _Ú 1558 |
| 61 | _-_ 1531 |
| 62 | Ù_ 1521 |
| 63 | Õâ 1518 |
| 64 | Ö 1509 |
| 65 | _Ø_ 1454 |
| 66 | âÐ 1433 |
| 67 | ÐÝ 1419 |
| 68 | Õà 1396 |
| 69 | Þâ 1389 |
| 70 | ì_ 1381 |
| 71 | ÓÞ 1375 |
| 72 | ÐÛ 1370 |
| 73 | _ßÞ 1364 |
| 74 | àÕ 1345 |
| 75 | ÚÐ 1338 |
| 76 | ßà 1337 |
| 77 | ÒÐ 1329 |
| 78 | âØ 1306 |
| 79 | ÛØ 1300 |
| 80 | _Ô 1297 |
| 81 | _Ü 1290 |
| 82 | Õá 1284 |
| 83 | ÒÞ 1271 |
| 84 | çÕ 1256 |
| 85 | Þà 1245 |
| 86 | .... 1232 |
| 87 | âì 1219 |
| 88 | Þá 1212 |
| 89 | è 1192 |
| 90 | î 1187 |
| 91 | Ò_ 1168 |
| 92 | ..... 1167 |
| 93 | ÞÝ 1147 |
| 94 | ÐÚ 1144 |
| 95 | æ 1109 |
| 96 | ÞÓ 1101 |
| 97 | ÛÞ 1099 |
| 98 | â_ 1093 |
| 99 | àØ 1076 |
| 100 | Ü_ 1074 |
| 101 | _ßà 1063 |
| 102 | ÞÛ 1062 |
| 103 | Ûì 1045 |
| 104 | _ÝÕ 1034 |
| 105 | ÕÛ 1029 |
| 106 | _Ñ 1026 |
| 107 | ØÝ 1000 |
| 108 | ÞÔ 998 |
| 109 | ÞÜ 996 |
| 110 | ÜÕ 993 |
| 111 | ë_ 975 |
| 112 | ÒÕ 968 |
| 113 | áÚ 968 |
| 114 | _ÝÐ 966 |
| 115 | ÔÕ 946 |
| 116 | Ýë 943 |
| 117 | _à 931 |
| 118 | _ç 923 |
| 119 | Ðâ 913 |
| 120 | ÕÜ 910 |
| 121 | ã_ 907 |
| 122 | ×Ð 898 |
| 123 | å_ 898 |
| 124 | ÛÕ 889 |
| 125 | âÞ_ 881 |
| 126 | Øâ 878 |
| 127 | ÞÙ 875 |
| 128 | áÕ 862 |
| 129 | _Ò_ 833 |
| 130 | _× 816 |
| 131 | ÚØ 816 |
| 132 | âà 807 |
| 133 | " 786 |
| 134 | âì_ 769 |
| 135 | ÕÔ 767 |
| 136 | ÝÕ_ 762 |
| 137 | áï 754 |
| 138 | ÛÐ 748 |
| 139 | ÞÑ 747 |
| 140 | ÜÞ 741 |
| 141 | ÔÐ 730 |
| 142 | ÓÞ_ 715 |
| 143 | Ú_ 713 |
| 144 | Øá 711 |
| 145 | _ã 702 |
| 146 | ÞÙ_ 697 |
| 147 | ØÛ 694 |
| 148 | ÜÐ 692 |
| 149 | ÝÝ 687 |
| 150 | ÔÞ 662 |
| 151 | Ðá 660 |
| 152 | ÐÜ 656 |
| 153 | Øï 649 |
| 154 | ÖÕ 646 |
| 155 | Ð× 638 |
| 156 | áÞ 629 |
| 157 | ÝÐ_ 619 |
| 158 | _Ó 615 |
| 159 | ÜØ 612 |
| 160 | _Ð 610 |
| 161 | í 609 |
| 162 | Òá 608 |
| 163 | .._ 605 |
| 164 | Òë 604 |
| 165 | ÞÓÞ 604 |
| 166 | ØÜ 604 |
| 167 | _ÚÞ 600 |
| 168 | ÐÒ 597 |
| 169 | áÛ 594 |
| 170 | ØÕ 593 |
| 171 | _ÝÕ_ 589 |
| 172 | _Õ 587 |
| 173 | _âÕ 583 |
| 174 | âã 583 |
| 175 | Øç 583 |
| 176 | àã 575 |
| 177 | Þáâ 571 |
| 178 | é 571 |
| 179 | Û_ 570 |
| 180 | _í 569 |
| 181 | _ï 559 |
| 182 | ° 556 |
| 183 | ÕÝØ 544 |
| 184 | Ø× 540 |
| 185 | ÕÚ 536 |
| 186 | ÞÒÐ 533 |
| 187 | ä 525 |
| 188 | : 519 |
| 189 | ² 513 |
| 190 | ÐÝØ 511 |
| 191 | _Òá 510 |
| 192 | ØÙ 510 |
| 193 | ½ 508 |
| 194 | _ÚÐ 508 |
| 195 | ! 503 |
| 196 | ? 501 |
| 197 | ÔØ 498 |
| 198 | ÛØ_ 489 |
| 199 | ¿ 488 |
| 200 | ßàÞ 486 |
| 201 | _àÐ 485 |
| 202 | áØ 484 |
| 203 | Øà 484 |
| 204 | _áâ 484 |
| 205 | ìÝ 484 |
| 206 | ÛìÝ 484 |
| 207 | :_ 484 |
| 208 | áï_ 480 |
| 209 | _×Ð 477 |
| 210 | ÑÞ 470 |
| 211 | _Û 469 |
| 212 | ..._ 465 |
| 213 | Ñë 464 |
| 214 | Øå 464 |
| 215 | ¸ 462 |
| 216 | ÕÓ 461 |
| 217 | âÒ 459 |
| 218 | ÝØï 458 |
| 219 | ïâ 454 |
| 220 | çâ 454 |
| 221 | íâ 445 |
| 222 | çÕá 442 |
| 223 | _âÞ 442 |
| 224 | Øï_ 441 |
| 225 | ØÚ 440 |
| 226 | ÒØ 437 |
| 227 | ÐÚ_ 436 |
| 228 | áâÐ 436 |
| 229 | ÞÓÞ_ 435 |
| 230 | _² 434 |
| 231 | ØçÕ 433 |
| 232 | æØ 431 |
| 233 | çâÞ 431 |
| 234 | ëå 429 |
| 235 | _íâ 429 |
| 236 | Á 425 |
| 237 | _çâ 424 |
| 238 | _° 424 |
| 239 | î_ 423 |
| 240 | ßÕ 422 |
| 241 | Ýï 422 |
| 242 | _çâÞ 422 |
| 243 | Ûï 419 |
| 244 | ÒáÕ 418 |
| 245 | Ôã 418 |
| 246 | ÕáÚ 415 |
| 247 | Ýâ 413 |
| 248 | ÚÐÚ 411 |
| 249 | áâÞ 411 |
| 250 | _Òë 409 |
| 251 | Ýã 408 |
| 252 | âá 406 |
| 253 | Ý_ 406 |
| 254 | _¿ 401 |
| 255 | ¼ 401 |
| 256 | ÝÞ_ 399 |
| 257 | _ßàÞ 398 |
| 258 | _. 397 |
| 259 | àÞÒ 396 |
| 260 | æÕ 396 |
| 261 | Úâ 394 |
| 262 | Õáâ 394 |
| 263 | _" 393 |
| 264 | èÕ 393 |
| 265 | á_ 392 |
| 266 | _Þâ 392 |
| 267 | ¾ 391 |
| 268 | _ÝÐ_ 391 |
| 269 | àÕÔ 391 |
| 270 | çÕáÚ 390 |
| 271 | Þ- 390 |
| 272 | ÒÐÝ 388 |
| 273 | Ð, 385 |
| 274 | ÐÔ 384 |
| 275 | Ðï 384 |
| 276 | _ï_ 383 |
| 277 | Ð,_ 383 |
| 278 | âë 383 |
| 279 | ?_ 383 |
| 280 | _ÞÑ 380 |
| 281 | _ÒáÕ 380 |
| 282 | _âÐ 378 |
| 283 | _ÚÐÚ 376 |
| 284 | åÞ 375 |
| 285 | âÐÚ 375 |
| 286 | ÐÛì 374 |
| 287 | ÞÖ 373 |
| 288 | ëå_ 372 |
| 289 | ÞÒÞ 372 |
| 290 | ØÒ 371 |
| 291 | _ÒÞ 369 |
| 292 | Üã 369 |
| 293 | _½ 369 |
| 294 | ÕÙ 368 |
| 295 | ßàÕ 368 |
| 296 | ×Ý 366 |
| 297 | ØçÕá 365 |
| 298 | ßØ 365 |
| 299 | ÕÓÞ 362 |
| 300 | _áÞ 360 |
| 301 | ÞÕ 360 |
| 302 | !_ 360 |
| 303 | Ðâì 360 |
| 304 | áâØ 358 |
| 305 | Øå_ 358 |
| 306 | âÝ 358 |
| 307 | ÜÕÝ 358 |
| 308 | âáï 356 |
| 309 | ØÕ_ 356 |
| 310 | ØçÕáÚ 354 |
| 311 | ÑÕ 352 |
| 312 | _Ñë 352 |
| 313 | áâà 349 |
| 314 | Úã 349 |
| 315 | _ÜÞ 348 |
| 316 | Õâ_ 348 |
| 317 | Â 346 |
| 318 | _ÜÕ 344 |
| 319 | ÕÒ 344 |
| 320 | ßàØ 343 |
| 321 | çØ 342 |
| 322 | ÜÝ 341 |
| 323 | ÝØï_ 339 |
| 324 | Ðà 338 |
| 325 | ÝÝÞ 337 |
| 326 | ÞÒÐÝ 334 |
| 327 | Úá 334 |
| 328 | ãà 328 |
| 329 | _çÕ 328 |
| 330 | ÞÛì 328 |
| 331 | ãâ 327 |
| 332 | çâÞ_ 324 |
| 333 | ÞÜ_ 323 |
| 334 | Þß 323 |
| 335 | àÜ 322 |
| 336 | °_ 322 |
| 337 | _çâÞ_ 322 |
| 338 | íâÞ 322 |
| 339 | Þç 321 |
| 340 | Þ,_ 320 |
| 341 | Þ, 320 |
| 342 | áÚÞ 319 |
| 343 | ÚÞÝ 319 |
| 344 | ÛÞÒ 318 |
| 345 | ëÙ 318 |
| 346 | ãá 317 |
| 347 | Þâ_ 316 |
| 348 | ØàÞ 315 |
| 349 | áâÒ 314 |
| 350 | Ø, 314 |
| 351 | ØÙ_ 313 |
| 352 | áá 313 |
| 353 | Ø,_ 313 |
| 354 | âÞÜ 312 |
| 355 | ÐÕ 312 |
| 356 | ÕÝÝ 311 |
| 357 | Õ× 311 |
| 358 | ÝÞÙ 311 |
| 359 | _íâÞ 310 |
| 360 | º 309 |
| 361 | áÐ 309 |
| 362 | àÐ× 309 |
| 363 | Õß 309 |
| 364 | _ÔÞ 308 |
| 365 | ÞÛÞ 308 |
| 366 | áâì 308 |
| 367 | ãÔ 307 |
| 368 | ÔÕÛ 307 |
| 369 | _¸ 305 |
| 370 | ÐÑ 305 |
| 371 | áì 303 |
| 372 | ÐÛìÝ 302 |
| 373 | ÞÚ 300 |
| 374 | áÚØ 300 |
| 375 | Ýëå 300 |
| 376 | _ßàØ 298 |
| 377 | çÐ 297 |
| 378 | _Ö 297 |
| 379 | Õ, 297 |
| 380 | Õ,_ 297 |
| 381 | _°_ 296 |
| 382 | _ÝØ 296 |
| 383 | _._ 295 |
| 384 | âáï_ 295 |
| 385 | ØØ 294 |
| 386 | ×ÝÐ 293 |
| 387 | ï, 292 |
| 388 | ï,_ 292 |
| 389 | _ßàÕ 291 |
| 390 | _Á 290 |
| 391 | ÝÞÙ_ 290 |
| 392 | Ðï_ 288 |
| 393 | _¾ 288 |
| 394 | ÐÛØ 286 |
| 395 | ÖÕ_ 285 |
| 396 | áß 285 |
| 397 | Øáâ 285 |
| 398 | Ýëå_ 285 |
| 399 | ×_ 283 |
| 400 | _¼ 283 |
-
diff --git a/xapian-applications/omega/langclass/russian-koi8_r.lm b/xapian-applications/omega/langclass/russian-koi8_r.lm
new file mode 100644
index 0000000..43dd334
-
|
+
|
|
| 1 | _ 28256 |
| 2 | Ï 8957 |
| 3 | Å 7206 |
| 4 | Á 6230 |
| 5 | É 5966 |
| 6 | Ô 5385 |
| 7 | Î 5338 |
| 8 | Ó 4464 |
| 9 | Ò 3984 |
| 10 | × 3504 |
| 11 | Ì 3335 |
| 12 | Ë 2700 |
| 13 | Í 2441 |
| 14 | Ð 2324 |
| 15 | Ä 2230 |
| 16 | Õ 1840 |
| 17 | Ñ 1562 |
| 18 | Ù 1540 |
| 19 | _Ð 1505 |
| 20 | , 1492 |
| 21 | ,_ 1492 |
| 22 | Ø 1437 |
| 23 | Ï_ 1423 |
| 24 | Å_ 1335 |
| 25 | ÓÔ 1286 |
| 26 | Ú 1283 |
| 27 | Ç 1233 |
| 28 | Â 1232 |
| 29 | É_ 1193 |
| 30 | ÔÏ 1175 |
| 31 | _Ó 1146 |
| 32 | Þ 1143 |
| 33 | _× 1118 |
| 34 | _Î 1062 |
| 35 | Á_ 1032 |
| 36 | . 999 |
| 37 | ÎÏ 996 |
| 38 | ÅÎ 920 |
| 39 | Ï× 911 |
| 40 | ._ 908 |
| 41 | Ê 907 |
| 42 | ÎÁ 893 |
| 43 | ÒÏ 869 |
| 44 | ÒÁ 851 |
| 45 | ÎÉ 844 |
| 46 | Ñ_ 808 |
| 47 | ÐÒ 786 |
| 48 | ÐÏ 775 |
| 49 | ËÏ 768 |
| 50 | Ö 763 |
| 51 | _Ï 735 |
| 52 | ÎÅ 730 |
| 53 | ÏÓ 723 |
| 54 | _É 721 |
| 55 | È 696 |
| 56 | ÒÅ 667 |
| 57 | _ÐÒ 629 |
| 58 | _Ë 626 |
| 59 | ×_ 621 |
| 60 | ÔÅ 620 |
| 61 | Ø_ 616 |
| 62 | ×Ï 599 |
| 63 | Ê_ 597 |
| 64 | _ÐÏ 592 |
| 65 | ÇÏ 589 |
| 66 | ÅÒ 584 |
| 67 | ÌÉ 579 |
| 68 | ÔÁ 576 |
| 69 | ÅÔ 566 |
| 70 | Í_ 562 |
| 71 | ÅÓ 548 |
| 72 | ÏÒ 545 |
| 73 | ÅÌ 545 |
| 74 | ÁÌ 544 |
| 75 | ×Á 535 |
| 76 | ÔØ 533 |
| 77 | ÌÅ 528 |
| 78 | _Ä 522 |
| 79 | ÏÍ 520 |
| 80 | ËÁ 520 |
| 81 | À 518 |
| 82 | Û 510 |
| 83 | _Ô 507 |
| 84 | ÏÔ 502 |
| 85 | ÏÌ 500 |
| 86 | ÁÎ 498 |
| 87 | ÌØ 486 |
| 88 | ÏÄ 476 |
| 89 | ÁÔ 475 |
| 90 | ÔÉ 471 |
| 91 | ÉÔ 458 |
| 92 | ÏÎ 457 |
| 93 | ÏÇ 450 |
| 94 | Ã 450 |
| 95 | ÌÏ 449 |
| 96 | Ô_ 448 |
| 97 | _ÎÁ 447 |
| 98 | ÎÙ 443 |
| 99 | ÄÅ 442 |
| 100 | _Í 438 |
| 101 | ÌÁ 431 |
| 102 | _×_ 430 |
| 103 | ÓË 428 |
| 104 | _ÎÅ 424 |
| 105 | ÒÉ 400 |
| 106 | ÅÍ 396 |
| 107 | _Â 392 |
| 108 | _Þ 388 |
| 109 | ÞÅ 388 |
| 110 | ÁË 388 |
| 111 | _É_ 385 |
| 112 | _Ò 382 |
| 113 | ÔÏ_ 381 |
| 114 | ÉÎ 380 |
| 115 | ÍÅ 374 |
| 116 | ÄÁ 362 |
| 117 | Ù_ 361 |
| 118 | ×Å 350 |
| 119 | ÔØ_ 348 |
| 120 | ÐÒÏ 347 |
| 121 | ÏÊ 346 |
| 122 | ÚÁ 338 |
| 123 | È_ 336 |
| 124 | ÁÓ 335 |
| 125 | ÏÂ 328 |
| 126 | ÓÑ 326 |
| 127 | ÉÍ 323 |
| 128 | ÔÒ 321 |
| 129 | ÎÁ_ 321 |
| 130 | ÓÏ 320 |
| 131 | ÅÄ 320 |
| 132 | ÏÓÔ 318 |
| 133 | ÉÅ 318 |
| 134 | ÍÏ 317 |
| 135 | ÎÎ 316 |
| 136 | ÓÅ 314 |
| 137 | - 314 |
| 138 | Á× 309 |
| 139 | ÖÅ 308 |
| 140 | Õ_ 307 |
| 141 | ËÉ 306 |
| 142 | ÎÅ_ 305 |
| 143 | ÇÏ_ 302 |
| 144 | _Ú 300 |
| 145 | ÉÌ 295 |
| 146 | _ÐÒÏ 291 |
| 147 | Ý 289 |
| 148 | Ô× 288 |
| 149 | ÉÓ 285 |
| 150 | _Õ 275 |
| 151 | _ËÏ 274 |
| 152 | ÅÎÉ 274 |
| 153 | Ü 273 |
| 154 | ÎÏ_ 272 |
| 155 | ÉÒ 271 |
| 156 | _Ç 271 |
| 157 | ÁÚ 267 |
| 158 | ÁÒ 266 |
| 159 | ÄÉ 265 |
| 160 | ÏÊ_ 264 |
| 161 | ÏÇÏ 263 |
| 162 | ÁÍ 260 |
| 163 | ÐÅ 258 |
| 164 | ×Ù 255 |
| 165 | ÍÉ 254 |
| 166 | ÄÏ 254 |
| 167 | ÓÔ× 247 |
| 168 | ÉÚ 246 |
| 169 | ÃÉ 246 |
| 170 | î 244 |
| 171 | Æ 240 |
| 172 | _Ü 239 |
| 173 | _Á 239 |
| 174 | Ë_ 238 |
| 175 | ó 237 |
| 176 | ÅÓÔ 237 |
| 177 | _ÎÅ_ 236 |
| 178 | ÂÙ 234 |
| 179 | ÉÑ 234 |
| 180 | ÒÕ 229 |
| 181 | ÅË 228 |
| 182 | ÓÉ 226 |
| 183 | ÔÅÌ 224 |
| 184 | ÉË 224 |
| 185 | ÌÉ_ 222 |
| 186 | ÅÊ 222 |
| 187 | ÓÑ_ 222 |
| 188 | ÎÔ 220 |
| 189 | ØÎ 218 |
| 190 | ÞÔ 216 |
| 191 | ÌØÎ 216 |
| 192 | ÏÅ 214 |
| 193 | ÍÁ 213 |
| 194 | _ÚÁ 211 |
| 195 | Ï×Á 211 |
| 196 | ÂÏ 210 |
| 197 | _Ì 210 |
| 198 | ÞÁ 209 |
| 199 | _î 208 |
| 200 | ÓÌ 205 |
| 201 | ÞÔÏ 204 |
| 202 | _ÞÔ 204 |
| 203 | ÅÔ_ 203 |
| 204 | _ÞÔÏ 203 |
| 205 | ÜÔ 202 |
| 206 | Ó_ 202 |
| 207 | ×É 201 |
| 208 | ÁÑ 199 |
| 209 | ÅÎÎ 199 |
| 210 | ÔÏÒ 199 |
| 211 | _ÎÁ_ 199 |
| 212 | ÓÔÁ 198 |
| 213 | ÏÖ 198 |
| 214 | ÉÅ_ 197 |
| 215 | _ó 197 |
| 216 | _ÓÏ 195 |
| 217 | ÐÒÉ 194 |
| 218 | ËÔ 193 |
| 219 | Ì_ 193 |
| 220 | ×Ó 191 |
| 221 | ÏÍ_ 190 |
| 222 | ÏÇÏ_ 187 |
| 223 | _Ö 187 |
| 224 | ÅÇ 186 |
| 225 | _ÔÏ 186 |
| 226 | ÁÄ 186 |
| 227 | _ÒÁ 185 |
| 228 | _ÜÔ 184 |
| 229 | _ËÁ 184 |
| 230 | ÓÔÉ 183 |
| 231 | _Å 182 |
| 232 | Å× 182 |
| 233 | ÔÓ 181 |
| 234 | _ÓÔ 180 |
| 235 | ÌÑ 180 |
| 236 | ÔÎ 180 |
| 237 | ÓÐ 173 |
| 238 | ÉÉ 172 |
| 239 | ÏÞ 172 |
| 240 | À_ 172 |
| 241 | ÐÒÅ 172 |
| 242 | ÏË 172 |
| 243 | ÁÔØ 171 |
| 244 | ÂÉ 171 |
| 245 | ÛÅ 170 |
| 246 | ÁÅ 169 |
| 247 | á 169 |
| 248 | ÏÒÏ 168 |
| 249 | ÙÈ 167 |
| 250 | Ï×Ï 166 |
| 251 | ÏÌØ 166 |
| 252 | ÁÎÉ 165 |
| 253 | ÞÔÏ_ 164 |
| 254 | _ÞÔÏ_ 163 |
| 255 | ÷ 163 |
| 256 | ÁÑ_ 163 |
| 257 | ÉÈ 162 |
| 258 | Ï,_ 160 |
| 259 | Ï, 160 |
| 260 | ÓÓ 160 |
| 261 | ÅÚ 159 |
| 262 | ÎÉÅ 158 |
| 263 | ÚÎ 157 |
| 264 | ÜÔÏ 157 |
| 265 | ÄÎ 156 |
| 266 | ÒÏ× 156 |
| 267 | ð 155 |
| 268 | Á, 155 |
| 269 | ÓËÏ 155 |
| 270 | _×Ï 155 |
| 271 | Á,_ 155 |
| 272 | _ÂÙ 154 |
| 273 | ÙÅ 154 |
| 274 | ÝÅ 154 |
| 275 | _ÏÂ 154 |
| 276 | É,_ 153 |
| 277 | É, 153 |
| 278 | ÎÎÏ 152 |
| 279 | _ÐÒÉ 151 |
| 280 | ÔÙ 150 |
| 281 | ÓÔÏ 150 |
| 282 | ÉÑ_ 150 |
| 283 | ÍÕ 150 |
| 284 | É× 150 |
| 285 | _- 149 |
| 286 | ÒÅÄ 148 |
| 287 | ÉÔÅ 147 |
| 288 | _ÜÔÏ 146 |
| 289 | ÍÅÎ 146 |
| 290 | ÏÐ 146 |
| 291 | _ÐÒÅ 145 |
| 292 | 145 |
| 293 | ÉÊ 145 |
| 294 | ÅÌØ 145 |
| 295 | _÷ 144 |
| 296 | _Ó_ 144 |
| 297 | ÉÞ 144 |
| 298 | __ 144 |
| 299 | _ 144 |
| 300 | _ 144 |
| 301 | ÈÏ 143 |
| 302 | ÅÇÏ 143 |
| 303 | ÑÔ 142 |
| 304 | ÔÓÑ 142 |
| 305 | ÐÅÒ 142 |
| 306 | ÙÊ 142 |
| 307 | ÐÁ 141 |
| 308 | ÏÔÏ 140 |
| 309 | -_ 140 |
| 310 | ÁÐ 139 |
| 311 | ÏÚ 139 |
| 312 | ÓËÉ 138 |
| 313 | ÙÈ_ 138 |
| 314 | ï 137 |
| 315 | ÓÁ 137 |
| 316 | ÙÅ_ 137 |
| 317 | _ð 136 |
| 318 | Å, 136 |
| 319 | Å,_ 136 |
| 320 | ÒÙ 136 |
| 321 | Î_ 135 |
| 322 | _á 135 |
| 323 | ÇÉ 134 |
| 324 | _×Ù 134 |
| 325 | ÓÔØ 134 |
| 326 | ÁÓÔ 133 |
| 327 | ÖÅ_ 133 |
| 328 | ÞÉ 133 |
| 329 | ÎÉÑ 133 |
| 330 | ÕÔ 133 |
| 331 | ÁÌÉ 132 |
| 332 | _-_ 131 |
| 333 | ËÕ 131 |
| 334 | ÓØ 131 |
| 335 | _ÄÏ 129 |
| 336 | ÄÕ 129 |
| 337 | ÒÁÚ 129 |
| 338 | ÖÎ 129 |
| 339 | ÁÅÔ 129 |
| 340 | _ÍÏ 127 |
| 341 | ÖÉ 127 |
| 342 | _ÄÅ 127 |
| 343 | ÓÔÒ 127 |
| 344 | ØÎÏ 126 |
| 345 | ÔÕ 126 |
| 346 | _×Ó 126 |
| 347 | ÌØÎÏ 126 |
| 348 | ÌÅÎ 125 |
| 349 | ÁÌØ 125 |
| 350 | Ï×_ 124 |
| 351 | ×Ì 123 |
| 352 | ÉÂ 123 |
| 353 | ÁË_ 123 |
| 354 | _ÐÅ 123 |
| 355 | ÔÅÌØ 123 |
| 356 | Ú_ 122 |
| 357 | ×Î 122 |
| 358 | ÁÀ 122 |
| 359 | ÅÎÔ 121 |
| 360 | ÐÏÌ 121 |
| 361 | ÉÈ_ 121 |
| 362 | ÙÊ_ 121 |
| 363 | ÓÎ 121 |
| 364 | ÎÏÇ 121 |
| 365 | ÃÅ 120 |
| 366 | _ÏÔ 120 |
| 367 | ÅÍ_ 119 |
| 368 | ÉÏ 119 |
| 369 | _ÔÅ 118 |
| 370 | ËÁË 118 |
| 371 | _ÎÏ 117 |
| 372 | ÉÉ_ 117 |
| 373 | ÉÔØ 117 |
| 374 | ë 116 |
| 375 | _ÉÚ 116 |
| 376 | ÕÓ 116 |
| 377 | ÎÏÓ 115 |
| 378 | ÂÅ 115 |
| 379 | ÕÄ 115 |
| 380 | ÅÒÅ 114 |
| 381 | ÂÌ 114 |
| 382 | ÉÔÅÌ 113 |
| 383 | ÅÇÏ_ 113 |
| 384 | ÙÍ 113 |
| 385 | ÁÂ 113 |
| 386 | ÎØ 113 |
| 387 | ÎÏÊ 112 |
| 388 | ÎÉÅ_ 112 |
| 389 | _Æ 112 |
| 390 | ÒÏÓ 111 |
| 391 | ËÒ 111 |
| 392 | _ËÁË 111 |
| 393 | ÛÉ 111 |
| 394 | Í, 111 |
| 395 | ÎÏ× 111 |
| 396 | Í,_ 111 |
| 397 | ÔÏÍ 110 |
| 398 | ÅÅ 110 |
| 399 | ÔÏ× 109 |
| 400 | Ó× 109 |
-
diff --git a/xapian-applications/omega/langclass/russian-windows1251.lm b/xapian-applications/omega/langclass/russian-windows1251.lm
new file mode 100644
index 0000000..6497c05
-
|
+
|
|
| 1 | _ 21836 |
| 2 | î 5818 |
| 3 | à 4506 |
| 4 | å 4258 |
| 5 | è 3769 |
| 6 | ò 3394 |
| 7 | í 3254 |
| 8 | ñ 2594 |
| 9 | ë 2470 |
| 10 | ð 2346 |
| 11 | â 2227 |
| 12 | ê 1798 |
| 13 | ì 1709 |
| 14 | ó 1673 |
| 15 | ä 1638 |
| 16 | ï 1377 |
| 17 | , 1187 |
| 18 | ,_ 1172 |
| 19 | î_ 1160 |
| 20 | ÿ 1059 |
| 21 | ü 1024 |
| 22 | û 1006 |
| 23 | å_ 966 |
| 24 | á 945 |
| 25 | _ï 937 |
| 26 | ç 910 |
| 27 | _í 907 |
| 28 | è_ 896 |
| 29 | òî 878 |
| 30 | ã 856 |
| 31 | _ñ 828 |
| 32 | _â 818 |
| 33 | à_ 801 |
| 34 | ÷ 795 |
| 35 | . 789 |
| 36 | ._ 761 |
| 37 | ñò 655 |
| 38 | íå 617 |
| 39 | _è 616 |
| 40 | íà 575 |
| 41 | ø 568 |
| 42 | é 565 |
| 43 | ïî 549 |
| 44 | ðà 526 |
| 45 | íî 524 |
| 46 | ÿ_ 522 |
| 47 | _ò 521 |
| 48 | _î 497 |
| 49 | _ä 496 |
| 50 | æ 484 |
| 51 | _ïî 477 |
| 52 | _ê 477 |
| 53 | ãî 472 |
| 54 | àë 470 |
| 55 | ü_ 461 |
| 56 | íè 456 |
| 57 | îâ 454 |
| 58 | êî 452 |
| 59 | ðî 432 |
| 60 | êà 431 |
| 61 | ëè 425 |
| 62 | õ 413 |
| 63 | òà 403 |
| 64 | åí 402 |
| 65 | _ì 396 |
| 66 | _á 396 |
| 67 | âî 395 |
| 68 | îë 388 |
| 69 | òî_ 384 |
| 70 | é_ 383 |
| 71 | _íå 378 |
| 72 | _è_ 375 |
| 73 | îð 369 |
| 74 | âà 367 |
| 75 | _íà 365 |
| 76 | ì_ 364 |
| 77 | îì 363 |
| 78 | ïð 357 |
| 79 | ó_ 357 |
| 80 | åë 353 |
| 81 | åð 352 |
| 82 | ëà 350 |
| 83 | èò 344 |
| 84 | îñ 344 |
| 85 | ë_ 343 |
| 86 | òü 338 |
| 87 | äå 337 |
| 88 | ëî 334 |
| 89 | ðè 330 |
| 90 | ðå 328 |
| 91 | èë 326 |
| 92 | àê 326 |
| 93 | îò 323 |
| 94 | - 322 |
| 95 | îí 320 |
| 96 | åò 317 |
| 97 | þ 312 |
| 98 | àí 308 |
| 99 | âå 308 |
| 100 | â_ 302 |
| 101 | _÷ 299 |
| 102 | ò_ 296 |
| 103 | çà 292 |
| 104 | íå_ 292 |
| 105 | òå 291 |
| 106 | äà 291 |
| 107 | _ç 291 |
| 108 | îä 286 |
| 109 | àð 283 |
| 110 | àò 283 |
| 111 | _ïð 274 |
| 112 | åñ 264 |
| 113 | ìî 263 |
| 114 | û_ 260 |
| 115 | îã 253 |
| 116 | _- 245 |
| 117 | _ó 245 |
| 118 | ê_ 244 |
| 119 | îá 240 |
| 120 | ãî_ 238 |
| 121 | ñÿ 235 |
| 122 | _íå_ 235 |
| 123 | ìå 233 |
| 124 | ëü 229 |
| 125 | -_ 225 |
| 126 | ñê 219 |
| 127 | _â_ 217 |
| 128 | åì 217 |
| 129 | íû 215 |
| 130 | îé 215 |
| 131 | ñå 211 |
| 132 | áû 211 |
| 133 | êè 211 |
| 134 | ÷ò 209 |
| 135 | òü_ 209 |
| 136 | ֌ 208 |
| 137 | _ã 206 |
| 138 | Í 206 |
| 139 | äî 205 |
| 140 | ö 205 |
| 141 | ÷òî 203 |
| 142 | èí 203 |
| 143 | àñ 202 |
| 144 | _-_ 202 |
| 145 | _÷ò 199 |
| 146 | _÷òî 199 |
| 147 | òè 197 |
| 148 | àâ 197 |
| 149 | íà_ 197 |
| 150 | _å 195 |
| 151 | åä 192 |
| 152 | _ð 191 |
| 153 | æå 190 |
| 154 | àì 190 |
| 155 | åã 188 |
| 156 | àç 188 |
| 157 | òð 185 |
| 158 | _òî 184 |
| 159 | _Í 183 |
| 160 | èì 182 |
| 161 | _çà 181 |
| 162 | äè 175 |
| 163 | Â 174 |
| 164 | _êî 172 |
| 165 | ëå 170 |
| 166 | _Â 169 |
| 167 | îé_ 168 |
| 168 | ! 168 |
| 169 | ÷òî_ 165 |
| 170 | ìó 165 |
| 171 | íî_ 164 |
| 172 | èñ 164 |
| 173 | í_ 164 |
| 174 | ìè 163 |
| 175 | ñòà 163 |
| 176 | ù 162 |
| 177 | _÷òî_ 162 |
| 178 | _áû 159 |
| 179 | ìà 158 |
| 180 | ðó 157 |
| 181 | âû 156 |
| 182 | âè 154 |
| 183 | âñ 154 |
| 184 | åãî 153 |
| 185 | èê 153 |
| 186 | èç 153 |
| 187 | ñü 153 |
| 188 | àø 152 |
| 189 | ûë 150 |
| 190 | _íà_ 149 |
| 191 | ñ_ 148 |
| 192 | òâ 148 |
| 193 | _ñò 146 |
| 194 | ñÿ_ 146 |
| 195 | ëè_ 146 |
| 196 | _ÿ 144 |
| 197 | îãî 143 |
| 198 | øå 141 |
| 199 | õ_ 141 |
| 200 | àê_ 141 |
| 201 | !_ 139 |
| 202 | ñî 139 |
| 203 | êó 139 |
| 204 | áî 137 |
| 205 | ïðî 136 |
| 206 | àòü 136 |
| 207 | åãî_ 135 |
| 208 | _êà 134 |
| 209 | _ìî 131 |
| 210 | èå 131 |
| 211 | îñò 130 |
| 212 | îì_ 129 |
| 213 | î, 128 |
| 214 | _ãî 128 |
| 215 | î,_ 127 |
| 216 | èâ 127 |
| 217 | ñë 126 |
| 218 | óò 126 |
| 219 | _òà 126 |
| 220 | íí 125 |
| 221 | àä 125 |
| 222 | Ñ 124 |
| 223 | íó 123 |
| 224 | î÷ 122 |
| 225 | _âû 122 |
| 226 | _âñ 121 |
| 227 | _îí 120 |
| 228 | áûë 120 |
| 229 | ïà 120 |
| 230 | óä 120 |
| 231 | _Ñ 119 |
| 232 | ïðè 119 |
| 233 | îâî 119 |
| 234 | õî 118 |
| 235 | ý 117 |
| 236 | _áûë 117 |
| 237 | è, 115 |
| 238 | åé 115 |
| 239 | _äî 115 |
| 240 | àë_ 114 |
| 241 | _æ 114 |
| 242 | è,_ 114 |
| 243 | _à 113 |
| 244 | _ý 113 |
| 245 | îå 112 |
| 246 | åñò 112 |
| 247 | îðî 110 |
| 248 | çí 110 |
| 249 | _äå 110 |
| 250 | øè 109 |
| 251 | èä 109 |
| 252 | ñòî 109 |
| 253 | _ïðî 109 |
| 254 | _âî 108 |
| 255 | àëè 108 |
| 256 | íÿ 107 |
| 257 | øà 107 |
| 258 | ýò 107 |
| 259 | îæ 106 |
| 260 | à, 106 |
| 261 | Ï 106 |
| 262 | ûé 105 |
| 263 | îëü 105 |
| 264 | óñ 105 |
| 265 | _ýò 105 |
| 266 | _ïðè 104 |
| 267 | èõ 104 |
| 268 | ñü_ 104 |
| 269 | ñè 104 |
| 270 | îï 104 |
| 271 | Ì 103 |
| 272 | à,_ 103 |
| 273 | àÿ 103 |
| 274 | åç 102 |
| 275 | äåë 102 |
| 276 | _ðà 102 |
| 277 | _Ï 101 |
| 278 | îâà 101 |
| 279 | óæ 101 |
| 280 | âàë 101 |
| 281 | þ_ 101 |
| 282 | êàê 100 |
| 283 | _ÿ_ 100 |
| 284 | ó, 100 |
| 285 | æå_ 100 |
| 286 | ñòâ 100 |
| 287 | äí 100 |
| 288 | ֏ 100 |
| 289 | òó 100 |
| 290 | _õ 100 |
| 291 | _Ì 99 |
| 292 | ò, 99 |
| 293 | îê 99 |
| 294 | èòü 99 |
| 295 | åò_ 99 |
| 296 | _ñî 99 |
| 297 | ëÿ 98 |
| 298 | àå 98 |
| 299 | _îò 98 |
| 300 | îãî_ 98 |
| 301 | Ê 97 |
| 302 | âîð 97 |
| 303 | òí 97 |
| 304 | _ë 97 |
| 305 | ó,_ 97 |
| 306 | äó 96 |
| 307 | _äà 96 |
| 308 | ò,_ 96 |
| 309 | áà 96 |
| 310 | ïå 96 |
| 311 | ëà_ 95 |
| 312 | áó 95 |
| 313 | áå 94 |
| 314 | ìó_ 94 |
| 315 | _ìå 94 |
| 316 | ֈ 94 |
| 317 | âñå 93 |
| 318 | òû 93 |
| 319 | êà_ 92 |
| 320 | _êàê 91 |
| 321 | ìí 91 |
| 322 | òñ 90 |
| 323 | íü 90 |
| 324 | _îá 90 |
| 325 | ? 90 |
| 326 | ðàç 89 |
| 327 | òàê 89 |
| 328 | _ñ_ 89 |
| 329 | ñà 89 |
| 330 | å, 89 |
| 331 | æè 88 |
| 332 | ô 88 |
| 333 | _òàê 88 |
| 334 | èÿ 88 |
| 335 | è÷ 88 |
| 336 | îí_ 87 |
| 337 | ìåí 87 |
| 338 | å,_ 87 |
| 339 | ûé_ 86 |
| 340 | ë, 86 |
| 341 | ë,_ 86 |
| 342 | òîì 86 |
| 343 | ðàí 86 |
| 344 | ýòî 86 |
| 345 | Íó 86 |
| 346 | È 86 |
| 347 | öè 85 |
| 348 | èå_ 85 |
| 349 | âåð 85 |
| 350 | ñâ 84 |
| 351 | _îí_ 84 |
| 352 | _âñå 84 |
| 353 | ðû 84 |
| 354 | _ýòî 84 |
| 355 | _È 84 |
| 356 | ç_ 83 |
| 357 | ïè 83 |
| 358 | ëó 83 |
| 359 | À 83 |
| 360 | åíè 82 |
| 361 | èë_ 82 |
| 362 | " 82 |
| 363 | ïîë 82 |
| 364 | àòü_ 81 |
| 365 | òîð 81 |
| 366 | èëè 81 |
| 367 | êð 81 |
| 368 | ëñ 81 |
| 369 | òåë 81 |
| 370 | åíí 81 |
| 371 | îòî 80 |
| 372 | äà_ 79 |
| 373 | èòå 79 |
| 374 | ñòð 79 |
| 375 | îâîð 79 |
| 376 | ãîâ 79 |
| 377 | îðè 79 |
| 378 | àíè 79 |
| 379 | _Íó 78 |
| 380 | ëñÿ 78 |
| 381 | _íè 78 |
| 382 | è. 77 |
| 383 | _ñâ 77 |
| 384 | è._ 76 |
| 385 | à. 76 |
| 386 | _èç 76 |
| 387 | èø 76 |
| 388 | ùå 76 |
| 389 | ÿò 76 |
| 390 | îëî 76 |
| 391 | ü, 75 |
| 392 | Î 75 |
| 393 | êè_ 75 |
| 394 | ü,_ 75 |
| 395 | ëî_ 75 |
| 396 | îç 74 |
| 397 | _åã 74 |
| 398 | ëüí 74 |
| 399 | üí 74 |
| 400 | ûå 74 |
-
diff --git a/xapian-applications/omega/langclass/spanish.lm b/xapian-applications/omega/langclass/spanish.lm
new file mode 100644
index 0000000..e40317f
-
|
+
|
|
| 1 | _ 25044 |
| 2 | e 7830 |
| 3 | a 7437 |
| 4 | o 5102 |
| 5 | s 4394 |
| 6 | n 4358 |
| 7 | i 4065 |
| 8 | r 3998 |
| 9 | l 3634 |
| 10 | d 3118 |
| 11 | c 2931 |
| 12 | t 2834 |
| 13 | u 2316 |
| 14 | a_ 2269 |
| 15 | e_ 2211 |
| 16 | s_ 1862 |
| 17 | de 1679 |
| 18 | p 1673 |
| 19 | _d 1644 |
| 20 | m 1447 |
| 21 | _de 1443 |
| 22 | n_ 1332 |
| 23 | o_ 1301 |
| 24 | en 1295 |
| 25 | _e 1216 |
| 26 | es 1177 |
| 27 | _l 1132 |
| 28 | de_ 1080 |
| 29 | la 1060 |
| 30 | os 1028 |
| 31 | _de_ 1027 |
| 32 | _p 963 |
| 33 | l_ 910 |
| 34 | ci 890 |
| 35 | _c 866 |
| 36 | _a 866 |
| 37 | os_ 801 |
| 38 | ar 777 |
| 39 | er 775 |
| 40 | as 768 |
| 41 | ra 746 |
| 42 | nt 736 |
| 43 | _la 727 |
| 44 | re 726 |
| 45 | ,_ 724 |
| 46 | , 724 |
| 47 | el 722 |
| 48 | ta 708 |
| 49 | ue 701 |
| 50 | g 678 |
| 51 | on 674 |
| 52 | al 670 |
| 53 | _s 666 |
| 54 | co 653 |
| 55 | b 637 |
| 56 | an 622 |
| 57 | v 616 |
| 58 | la_ 616 |
| 59 | or 612 |
| 60 | te 599 |
| 61 | st 596 |
| 62 | el_ 580 |
| 63 | _la_ 573 |
| 64 | y 545 |
| 65 | to 543 |
| 66 | r_ 517 |
| 67 | ad 512 |
| 68 | ó 511 |
| 69 | do 504 |
| 70 | ro 504 |
| 71 | se 488 |
| 72 | as_ 488 |
| 73 | q 487 |
| 74 | qu 487 |
| 75 | . 479 |
| 76 | ._ 478 |
| 77 | en_ 475 |
| 78 | ca 460 |
| 79 | in 459 |
| 80 | un 456 |
| 81 | _co 450 |
| 82 | es_ 449 |
| 83 | ic 449 |
| 84 | _en 440 |
| 85 | ac 440 |
| 86 | que 439 |
| 87 | na 439 |
| 88 | lo 430 |
| 89 | _m 430 |
| 90 | f 429 |
| 91 | ent 428 |
| 92 | da 412 |
| 93 | ue_ 411 |
| 94 | po 405 |
| 95 | le 399 |
| 96 | _q 399 |
| 97 | _qu 399 |
| 98 | que_ 393 |
| 99 | _que 388 |
| 100 | ie 386 |
| 101 | h 385 |
| 102 | pa 382 |
| 103 | y_ 371 |
| 104 | ti 367 |
| 105 | _que_ 365 |
| 106 | _en_ 365 |
| 107 | _y 361 |
| 108 | tr 358 |
| 109 | _el 353 |
| 110 | ri 349 |
| 111 | ia 342 |
| 112 | _el_ 333 |
| 113 | _se 330 |
| 114 | ió 330 |
| 115 | _y_ 330 |
| 116 | io 329 |
| 117 | pr 320 |
| 118 | ón 317 |
| 119 | ec 317 |
| 120 | no 314 |
| 121 | id 301 |
| 122 | í 300 |
| 123 | mi 299 |
| 124 | _t 299 |
| 125 | ión 292 |
| 126 | nte 292 |
| 127 | me 286 |
| 128 | aci 283 |
| 129 | do_ 279 |
| 130 | li 276 |
| 131 | con 276 |
| 132 | nd 273 |
| 133 | est 272 |
| 134 | ni 272 |
| 135 | á 271 |
| 136 | di 270 |
| 137 | _es 268 |
| 138 | _lo 267 |
| 139 | ció 265 |
| 140 | ma 265 |
| 141 | ón_ 264 |
| 142 | _pr 263 |
| 143 | _r 261 |
| 144 | ción 255 |
| 145 | z 254 |
| 146 | ra_ 251 |
| 147 | si 247 |
| 148 | ión_ 246 |
| 149 | oc 245 |
| 150 | nc 244 |
| 151 | _u 244 |
| 152 | _po 243 |
| 153 | los 243 |
| 154 | or_ 242 |
| 155 | _con 241 |
| 156 | is 239 |
| 157 | del 238 |
| 158 | _del 237 |
| 159 | ado 236 |
| 160 | se_ 233 |
| 161 | _i 233 |
| 162 | los_ 231 |
| 163 | _re 231 |
| 164 | por 229 |
| 165 | _del_ 228 |
| 166 | sta 228 |
| 167 | del_ 228 |
| 168 | al_ 228 |
| 169 | ne 226 |
| 170 | _h 226 |
| 171 | cu 225 |
| 172 | _n 225 |
| 173 | _a_ 224 |
| 174 | _v 224 |
| 175 | _un 223 |
| 176 | ce 222 |
| 177 | so 220 |
| 178 | ción_ 218 |
| 179 | res 218 |
| 180 | vi 217 |
| 181 | om 216 |
| 182 | te_ 212 |
| 183 | _pa 211 |
| 184 | ien 210 |
| 185 | j 209 |
| 186 | E 208 |
| 187 | _los 207 |
| 188 | _los_ 207 |
| 189 | to_ 206 |
| 190 | ol 204 |
| 191 | it 203 |
| 192 | am 202 |
| 193 | ació 201 |
| 194 | rt 201 |
| 195 | ación 201 |
| 196 | pe 197 |
| 197 | ha 190 |
| 198 | _se_ 189 |
| 199 | nto 188 |
| 200 | _o 184 |
| 201 | _E 184 |
| 202 | on_ 184 |
| 203 | sa 183 |
| 204 | na_ 182 |
| 205 | ta_ 181 |
| 206 | su 180 |
| 207 | cia 180 |
| 208 | mo 180 |
| 209 | ct 178 |
| 210 | par 178 |
| 211 | _f 177 |
| 212 | _por 176 |
| 213 | eg 172 |
| 214 | _in 172 |
| 215 | ur 170 |
| 216 | L 168 |
| 217 | ve 166 |
| 218 | im 164 |
| 219 | ga 163 |
| 220 | _est 161 |
| 221 | ar_ 161 |
| 222 | ab 160 |
| 223 | _L 159 |
| 224 | tu 158 |
| 225 | at 158 |
| 226 | no_ 157 |
| 227 | s, 157 |
| 228 | s,_ 157 |
| 229 | _por_ 156 |
| 230 | por_ 156 |
| 231 | las 156 |
| 232 | ba 154 |
| 233 | o,_ 154 |
| 234 | o, 154 |
| 235 | ento 151 |
| 236 | et 150 |
| 237 | C 150 |
| 238 | _ha 149 |
| 239 | A 149 |
| 240 | tra 148 |
| 241 | ient 148 |
| 242 | _al 147 |
| 243 | a,_ 146 |
| 244 | ica 146 |
| 245 | a, 146 |
| 246 | pro 146 |
| 247 | ado_ 145 |
| 248 | ici 144 |
| 249 | _ca 144 |
| 250 | an_ 144 |
| 251 | las_ 143 |
| 252 | ara 143 |
| 253 | nci 143 |
| 254 | ente 142 |
| 255 | ú 142 |
| 256 | rr 142 |
| 257 | ir 142 |
| 258 | da_ 141 |
| 259 | em 141 |
| 260 | ll 140 |
| 261 | il 139 |
| 262 | ía 138 |
| 263 | iv 138 |
| 264 | _su 138 |
| 265 | _par 136 |
| 266 | ul 136 |
| 267 | ant 136 |
| 268 | _A 135 |
| 269 | mp 135 |
| 270 | _las_ 134 |
| 271 | _las 134 |
| 272 | _C 134 |
| 273 | _pro 133 |
| 274 | men 132 |
| 275 | P 132 |
| 276 | des 131 |
| 277 | com 130 |
| 278 | ion 130 |
| 279 | era 130 |
| 280 | ed 129 |
| 281 | ida 129 |
| 282 | sp 128 |
| 283 | gu 127 |
| 284 | nte_ 127 |
| 285 | ns 127 |
| 286 | za 126 |
| 287 | dos 125 |
| 288 | M 125 |
| 289 | cio 125 |
| 290 | les 125 |
| 291 | _P 124 |
| 292 | bl 124 |
| 293 | _com 122 |
| 294 | s._ 122 |
| 295 | s. 122 |
| 296 | _M 121 |
| 297 | ua 120 |
| 298 | nta 120 |
| 299 | mu 119 |
| 300 | _no 118 |
| 301 | dad 118 |
| 302 | ñ 117 |
| 303 | é 116 |
| 304 | un_ 116 |
| 305 | va 116 |
| 306 | ist 116 |
| 307 | nes 116 |
| 308 | iento 115 |
| 309 | one 114 |
| 310 | ara_ 113 |
| 311 | S 113 |
| 312 | ada 113 |
| 313 | _un_ 113 |
| 314 | fi 111 |
| 315 | pre 110 |
| 316 | tos 110 |
| 317 | ter 109 |
| 318 | ot 109 |
| 319 | esta 108 |
| 320 | _me 107 |
| 321 | ido 107 |
| 322 | ob 107 |
| 323 | _g 105 |
| 324 | br 105 |
| 325 | go 105 |
| 326 | ea 104 |
| 327 | nto_ 104 |
| 328 | ona 103 |
| 329 | pu 103 |
| 330 | dos_ 103 |
| 331 | tro 103 |
| 332 | ier 103 |
| 333 | para 102 |
| 334 | ment 101 |
| 335 | ag 101 |
| 336 | ero 101 |
| 337 | gr 101 |
| 338 | rec 101 |
| 339 | bi 101 |
| 340 | ia_ 100 |
| 341 | una 100 |
| 342 | nic 99 |
| 343 | ncia 99 |
| 344 | ía_ 98 |
| 345 | a._ 98 |
| 346 | tos_ 98 |
| 347 | a. 98 |
| 348 | ran 98 |
| 349 | lo_ 97 |
| 350 | ones 97 |
| 351 | rm 96 |
| 352 | lu 96 |
| 353 | ron 95 |
| 354 | con_ 95 |
| 355 | ó_ 95 |
| 356 | nes_ 95 |
| 357 | _ci 95 |
| 358 | ante 94 |
| 359 | ch 94 |
| 360 | _con_ 94 |
| 361 | _para 94 |
| 362 | ntr 93 |
| 363 | una_ 93 |
| 364 | para_ 93 |
| 365 | mie 92 |
| 366 | ico 92 |
| 367 | fe 92 |
| 368 | les_ 92 |
| 369 | uc 92 |
| 370 | ip 91 |
| 371 | sto 91 |
| 372 | _ma 91 |
| 373 | ui 91 |
| 374 | sta_ 91 |
| 375 | _ve 90 |
| 376 | cion 90 |
| 377 | " 90 |
| 378 | op 90 |
| 379 | cal 89 |
| 380 | _mu 89 |
| 381 | _S 89 |
| 382 | ro_ 89 |
| 383 | _pe 88 |
| 384 | ste 88 |
| 385 | ras 88 |
| 386 | pl 88 |
| 387 | _una 88 |
| 388 | _di 87 |
| 389 | ento_ 86 |
| 390 | ita 86 |
| 391 | ione 85 |
| 392 | ect 85 |
| 393 | _una_ 85 |
| 394 | mien 85 |
| 395 | tan 85 |
| 396 | du 84 |
| 397 | den 84 |
| 398 | ndo 84 |
| 399 | per 84 |
| 400 | eri 84 |
-
diff --git a/xapian-applications/omega/langclass/swedish.lm b/xapian-applications/omega/langclass/swedish.lm
new file mode 100644
index 0000000..1c02124
-
|
+
|
|
| 1 | _ 33494 |
| 2 | e 8992 |
| 3 | n 7900 |
| 4 | t 7859 |
| 5 | a 7781 |
| 6 | r 7251 |
| 7 | s 6435 |
| 8 | i 5649 |
| 9 | l 4541 |
| 10 | d 4079 |
| 11 | o 3724 |
| 12 | m 3203 |
| 13 | k 3058 |
| 14 | g 2478 |
| 15 | en 2403 |
| 16 | n_ 2389 |
| 17 | t_ 2073 |
| 18 | de 1939 |
| 19 | r_ 1910 |
| 20 | v 1890 |
| 21 | h 1789 |
| 22 | u 1782 |
| 23 | _s 1768 |
| 24 | ä 1724 |
| 25 | er 1709 |
| 26 | f 1597 |
| 27 | en_ 1537 |
| 28 | a_ 1526 |
| 29 | an 1357 |
| 30 | p 1320 |
| 31 | et 1317 |
| 32 | ö 1278 |
| 33 | å 1261 |
| 34 | st 1236 |
| 35 | ar 1226 |
| 36 | c 1191 |
| 37 | _d 1158 |
| 38 | e_ 1116 |
| 39 | in 1045 |
| 40 | _f 1027 |
| 41 | te 1000 |
| 42 | b 997 |
| 43 | _a 978 |
| 44 | s_ 974 |
| 45 | ra 958 |
| 46 | . 956 |
| 47 | tt 935 |
| 48 | _i 898 |
| 49 | _m 890 |
| 50 | ._ 886 |
| 51 | ll 870 |
| 52 | ta 844 |
| 53 | _o 842 |
| 54 | _e 839 |
| 55 | nd 820 |
| 56 | ti 804 |
| 57 | sk 798 |
| 58 | re 779 |
| 59 | at 769 |
| 60 | _de 754 |
| 61 | om 743 |
| 62 | m_ 739 |
| 63 | ör 720 |
| 64 | , 697 |
| 65 | ,_ 695 |
| 66 | ng 686 |
| 67 | li 673 |
| 68 | ka 666 |
| 69 | oc 662 |
| 70 | _h 654 |
| 71 | on 652 |
| 72 | et_ 647 |
| 73 | ch 645 |
| 74 | ns 643 |
| 75 | is 642 |
| 76 | er_ 630 |
| 77 | är 625 |
| 78 | _v 614 |
| 79 | _t 614 |
| 80 | ni 611 |
| 81 | i_ 609 |
| 82 | _oc 592 |
| 83 | tt_ 587 |
| 84 | na 586 |
| 85 | y 586 |
| 86 | la 579 |
| 87 | _b 579 |
| 88 | h_ 577 |
| 89 | kt 575 |
| 90 | ch_ 568 |
| 91 | ig 564 |
| 92 | fö 563 |
| 93 | och 555 |
| 94 | or 555 |
| 95 | _och 554 |
| 96 | och_ 554 |
| 97 | _och_ 553 |
| 98 | me 548 |
| 99 | den 548 |
| 100 | om_ 535 |
| 101 | _i_ 531 |
| 102 | d_ 530 |
| 103 | j 529 |
| 104 | ik 520 |
| 105 | de_ 520 |
| 106 | för 518 |
| 107 | ge 498 |
| 108 | ad 497 |
| 109 | _k 491 |
| 110 | _fö 487 |
| 111 | ri 484 |
| 112 | el 482 |
| 113 | il 481 |
| 114 | so 480 |
| 115 | al 474 |
| 116 | g_ 469 |
| 117 | le 464 |
| 118 | an_ 461 |
| 119 | _för 447 |
| 120 | si 437 |
| 121 | ar_ 437 |
| 122 | att 435 |
| 123 | _p 434 |
| 124 | es 420 |
| 125 | ing 413 |
| 126 | se 407 |
| 127 | to 404 |
| 128 | _u 403 |
| 129 | _en 403 |
| 130 | and 398 |
| 131 | den_ 395 |
| 132 | nde 393 |
| 133 | nn 393 |
| 134 | _l 391 |
| 135 | å_ 391 |
| 136 | D 385 |
| 137 | än 383 |
| 138 | nt 382 |
| 139 | l_ 381 |
| 140 | tr 378 |
| 141 | _D 372 |
| 142 | va 370 |
| 143 | am 369 |
| 144 | sa 367 |
| 145 | _so 365 |
| 146 | ga 364 |
| 147 | _en_ 361 |
| 148 | är_ 358 |
| 149 | ck 357 |
| 150 | av 354 |
| 151 | v_ 351 |
| 152 | ed 347 |
| 153 | ma 346 |
| 154 | da 346 |
| 155 | som 346 |
| 156 | rs 344 |
| 157 | som_ 344 |
| 158 | ve 342 |
| 159 | ter 341 |
| 160 | att_ 341 |
| 161 | ha 338 |
| 162 | ne 337 |
| 163 | ut 335 |
| 164 | as 332 |
| 165 | ska 329 |
| 166 | _at 327 |
| 167 | _att 326 |
| 168 | _som 324 |
| 169 | _att_ 324 |
| 170 | _som_ 323 |
| 171 | vi 322 |
| 172 | ikt 317 |
| 173 | _av 316 |
| 174 | det 316 |
| 175 | _den 315 |
| 176 | he 315 |
| 177 | ss 314 |
| 178 | un 307 |
| 179 | ke 304 |
| 180 | _g 303 |
| 181 | us 302 |
| 182 | di 302 |
| 183 | _st 300 |
| 184 | rn 297 |
| 185 | _me 296 |
| 186 | _ä 295 |
| 187 | ade 294 |
| 188 | " 290 |
| 189 | _ha 290 |
| 190 | av_ 289 |
| 191 | ill 288 |
| 192 | _n 286 |
| 193 | _in 279 |
| 194 | io 275 |
| 195 | _r 275 |
| 196 | der 275 |
| 197 | it 274 |
| 198 | _av_ 274 |
| 199 | sta 274 |
| 200 | gen 272 |
| 201 | isk 270 |
| 202 | _ti 269 |
| 203 | id 265 |
| 204 | na_ 265 |
| 205 | ns_ 264 |
| 206 | ko 262 |
| 207 | _den_ 261 |
| 208 | ag 258 |
| 209 | det_ 257 |
| 210 | lig 257 |
| 211 | era 256 |
| 212 | ll_ 255 |
| 213 | _det 252 |
| 214 | _är 251 |
| 215 | be 249 |
| 216 | _är_ 248 |
| 217 | ra_ 247 |
| 218 | ion 244 |
| 219 | - 241 |
| 220 | pr 240 |
| 221 | oni 233 |
| 222 | til 231 |
| 223 | ten 228 |
| 224 | _si 225 |
| 225 | k_ 222 |
| 226 | på 222 |
| 227 | fr 221 |
| 228 | ro 219 |
| 229 | till 219 |
| 230 | iv 216 |
| 231 | ls 216 |
| 232 | ande 215 |
| 233 | ör_ 214 |
| 234 | _det_ 213 |
| 235 | äl 212 |
| 236 | _på 211 |
| 237 | ts 210 |
| 238 | ens 209 |
| 239 | med 209 |
| 240 | mm 208 |
| 241 | rt 208 |
| 242 | _till 208 |
| 243 | _til 208 |
| 244 | _va 207 |
| 245 | _fr 205 |
| 246 | _sk 205 |
| 247 | var 205 |
| 248 | nin 204 |
| 249 | ning 203 |
| 250 | ol 201 |
| 251 | ka_ 200 |
| 252 | lle 198 |
| 253 | ett 198 |
| 254 | rd 197 |
| 255 | em 196 |
| 256 | på_ 195 |
| 257 | x 195 |
| 258 | rk 194 |
| 259 | _ut 194 |
| 260 | ste 194 |
| 261 | ds 193 |
| 262 | _vi 192 |
| 263 | år 192 |
| 264 | S 192 |
| 265 | nde_ 191 |
| 266 | are 191 |
| 267 | ver 190 |
| 268 | _på_ 190 |
| 269 | nis 189 |
| 270 | kr 189 |
| 271 | _med 188 |
| 272 | all 188 |
| 273 | ån 187 |
| 274 | nge 185 |
| 275 | mo 184 |
| 276 | os 183 |
| 277 | ld 182 |
| 278 | ade_ 181 |
| 279 | _S 181 |
| 280 | ed_ 180 |
| 281 | rä 176 |
| 282 | De 175 |
| 283 | _- 175 |
| 284 | kan 174 |
| 285 | ta_ 173 |
| 286 | ng_ 172 |
| 287 | vä 171 |
| 288 | för_ 170 |
| 289 | ill_ 170 |
| 290 | han 170 |
| 291 | _De 170 |
| 292 | pp 169 |
| 293 | lt 169 |
| 294 | sam 168 |
| 295 | nte 167 |
| 296 | ans 167 |
| 297 | ton 166 |
| 298 | ur 165 |
| 299 | mi 165 |
| 300 | ess 165 |
| 301 | kl 164 |
| 302 | ig_ 164 |
| 303 | ks 164 |
| 304 | as_ 163 |
| 305 | und 163 |
| 306 | men 162 |
| 307 | med_ 161 |
| 308 | _med_ 161 |
| 309 | ak 161 |
| 310 | Di 160 |
| 311 | ot 159 |
| 312 | rna 159 |
| 313 | ul 159 |
| 314 | _var 159 |
| 315 | te_ 158 |
| 316 | gen_ 158 |
| 317 | het 157 |
| 318 | kto 157 |
| 319 | str 156 |
| 320 | _Di 155 |
| 321 | tad 155 |
| 322 | lan 154 |
| 323 | ga_ 154 |
| 324 | iska 154 |
| 325 | fa 154 |
| 326 | fi 154 |
| 327 | så 154 |
| 328 | Dikt 153 |
| 329 | Dik 153 |
| 330 | pe 153 |
| 331 | ska_ 152 |
| 332 | ja 152 |
| 333 | H 151 |
| 334 | res 151 |
| 335 | ku 151 |
| 336 | iu 150 |
| 337 | ande_ 150 |
| 338 | till_ 150 |
| 339 | t. 150 |
| 340 | ern 150 |
| 341 | rm 149 |
| 342 | _Dikt 149 |
| 343 | _Dik 149 |
| 344 | ie 149 |
| 345 | bl 148 |
| 346 | -_ 147 |
| 347 | od 147 |
| 348 | _H 147 |
| 349 | n. 147 |
| 350 | ist 147 |
| 351 | _di 146 |
| 352 | ius 146 |
| 353 | _" 145 |
| 354 | la_ 145 |
| 355 | sl 145 |
| 356 | man 145 |
| 357 | ren 145 |
| 358 | _för_ 145 |
| 359 | toni 144 |
| 360 | kton 144 |
| 361 | n._ 144 |
| 362 | ktoni 144 |
| 363 | ikton 144 |
| 364 | I 144 |
| 365 | ikto 144 |
| 366 | nius 143 |
| 367 | ten_ 143 |
| 368 | onius 143 |
| 369 | oniu 143 |
| 370 | toniu 143 |
| 371 | ing_ 143 |
| 372 | Dikto 143 |
| 373 | niu 143 |
| 374 | _ko 143 |
| 375 | ic 142 |
| 376 | _sa 142 |
| 377 | _han 142 |
| 378 | ett_ 142 |
| 379 | sm 141 |
| 380 | ba 141 |
| 381 | M 141 |
| 382 | gr 140 |
| 383 | lä 140 |
| 384 | ex 138 |
| 385 | t._ 138 |
| 386 | sp 137 |
| 387 | lla 137 |
| 388 | _et 137 |
| 389 | _M 137 |
| 390 | dr 137 |
| 391 | rö 136 |
| 392 | rad 136 |
| 393 | ek 136 |
| 394 | _be 135 |
| 395 | tar 135 |
| 396 | _-_ 135 |
| 397 | _om 134 |
| 398 | rl 134 |
| 399 | E 134 |
| 400 | mä 133 |
-
diff --git a/xapian-applications/omega/omindex.cc b/xapian-applications/omega/omindex.cc
index 19867a0..116d581 100644
a
|
b
|
|
4 | 4 | * Copyright 2001,2005 James Aylett |
5 | 5 | * Copyright 2001,2002 Ananova Ltd |
6 | 6 | * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015 Olly Betts |
| 7 | * Copyright 2006,2007,2008 AVL List GesmbH |
7 | 8 | * Copyright 2009 Frank J Bruzzaniti |
8 | 9 | * Copyright 2012 Mihai Bivol |
9 | 10 | * |
… |
… |
|
71 | 72 | |
72 | 73 | #include "gnu_getopt.h" |
73 | 74 | |
| 75 | #ifdef HAVE_LIBTEXTCAT |
| 76 | # include <textcat.h> |
| 77 | # ifndef LANGCLASS_CONF |
| 78 | # define LANGCLASS_CONF "/var/lib/omega/langclass/langclass.conf" |
| 79 | # endif |
| 80 | #endif |
| 81 | |
74 | 82 | using namespace std; |
75 | 83 | |
76 | 84 | #define TITLE_SIZE 128 |
… |
… |
static bool use_ctime = false;
|
96 | 104 | static string root; |
97 | 105 | static string site_term, host_term; |
98 | 106 | static Xapian::WritableDatabase db; |
99 | | static Xapian::Stem stemmer("english"); |
| 107 | static string curr_lang = "english"; |
| 108 | static Xapian::Stem stemmer(curr_lang); |
100 | 109 | static Xapian::TermGenerator indexer; |
101 | 110 | |
102 | 111 | static Xapian::doccount old_docs_not_seen; |
103 | 112 | static Xapian::docid old_lastdocid; |
104 | 113 | static vector<bool> updated; |
105 | 114 | |
| 115 | #ifdef HAVE_LIBTEXTCAT |
| 116 | static void *textcat; |
| 117 | #endif |
| 118 | |
106 | 119 | // The longest string after a '.' to treat as an extension. If there's a |
107 | 120 | // longer entry in the mime_map, we set this to that length instead. |
108 | 121 | static size_t max_ext_len = 7; |
… |
… |
index_mimetype(const string & file, const string & url, const string & ext,
|
413 | 426 | if (verbose) cout << flush; |
414 | 427 | |
415 | 428 | string author, title, sample, keywords, topic, dump; |
| 429 | string language; |
416 | 430 | string md5; |
417 | 431 | time_t created = time_t(-1); |
418 | 432 | |
… |
… |
index_mimetype(const string & file, const string & url, const string & ext,
|
934 | 948 | } |
935 | 949 | record += "\nsize="; |
936 | 950 | record += str(d.get_size()); |
| 951 | #ifdef HAVE_LIBTEXTCAT |
| 952 | if (language.empty()) { |
| 953 | const char * lang = |
| 954 | textcat_Classify(textcat, sample.data(), sample.size()); |
| 955 | if (lang[0] == '[') { |
| 956 | // There may be multiple codes, each in square brackets - |
| 957 | // we just use the first. |
| 958 | const char * end = strchr(++lang, ']'); |
| 959 | if (end) language.assign(lang, end - lang); |
| 960 | } |
| 961 | } |
| 962 | #endif |
| 963 | if (!language.empty()) { |
| 964 | record += "\nlanguage="; |
| 965 | record += language; |
| 966 | } |
| 967 | if (language != curr_lang) { |
| 968 | stemmer = Xapian::Stem(language); |
| 969 | curr_lang = language; |
| 970 | } |
937 | 971 | newdocument.set_data(record); |
938 | 972 | |
939 | 973 | // Index the title, document text, keywords and topic. |
… |
… |
main(int argc, char **argv)
|
1684 | 1718 | return 1; |
1685 | 1719 | } |
1686 | 1720 | |
| 1721 | #ifdef HAVE_LIBTEXTCAT |
| 1722 | textcat = textcat_Init(LANGCLASS_CONF); |
| 1723 | #endif |
| 1724 | |
1687 | 1725 | root = argv[optind]; |
1688 | 1726 | if (!endswith(root, '/')) { |
1689 | 1727 | root += '/'; |
… |
… |
main(int argc, char **argv)
|
1789 | 1827 | // If we created a temporary directory then delete it. |
1790 | 1828 | remove_tmpdir(); |
1791 | 1829 | |
| 1830 | #ifdef HAVE_LIBTEXTCAT |
| 1831 | textcat_Done(textcat); |
| 1832 | #endif |
1792 | 1833 | return exitcode; |
1793 | 1834 | } |
-
diff --git a/xapian-applications/omega/scriptindex.cc b/xapian-applications/omega/scriptindex.cc
index e6b5b61..3192baf 100644
a
|
b
|
|
4 | 4 | * Copyright 2001 Sam Liddicott |
5 | 5 | * Copyright 2001,2002 Ananova Ltd |
6 | 6 | * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015 Olly Betts |
| 7 | * Copyright 2006,2007 AVL List GesmbH |
7 | 8 | * |
8 | 9 | * This program is free software; you can redistribute it and/or |
9 | 10 | * modify it under the terms of the GNU General Public License as |
… |
… |
|
50 | 51 | |
51 | 52 | #include "gnu_getopt.h" |
52 | 53 | |
| 54 | #ifdef HAVE_LIBTEXTCAT |
| 55 | # include <textcat.h> |
| 56 | # ifndef LANGCLASS_CONF |
| 57 | # define LANGCLASS_CONF "/var/lib/omega/langclass/langclass.conf" |
| 58 | # endif |
| 59 | #endif |
| 60 | |
53 | 61 | using namespace std; |
54 | 62 | |
55 | 63 | #define PROG_NAME "scriptindex" |
… |
… |
static bool verbose;
|
59 | 67 | static int addcount; |
60 | 68 | static int repcount; |
61 | 69 | static int delcount; |
| 70 | static string curr_lang = "english"; |
| 71 | #ifdef HAVE_LIBTEXTCAT |
| 72 | static void *textcat; |
| 73 | #endif |
62 | 74 | |
63 | 75 | inline static bool |
64 | 76 | p_space(unsigned int c) |
… |
… |
static bool
|
423 | 435 | index_file(const char *fname, istream &stream, |
424 | 436 | Xapian::WritableDatabase &database, Xapian::TermGenerator &indexer) |
425 | 437 | { |
| 438 | string language; |
426 | 439 | string line; |
427 | 440 | size_t line_no = 0; |
428 | 441 | while (!stream.eof() && getline(stream, line)) { |
… |
… |
index_file(const char *fname, istream &stream,
|
465 | 478 | // Default to not indexing spellings. |
466 | 479 | indexer.set_flags(Xapian::TermGenerator::flags(0)); |
467 | 480 | |
| 481 | #ifdef HAVE_LIBTEXTCAT |
| 482 | language.resize(0); |
| 483 | const char * lang = |
| 484 | textcat_Classify(textcat, value.data(), value.size()); |
| 485 | if (lang[0] == '[') { |
| 486 | // There may be multiple codes, each in square brackets - |
| 487 | // we just use the first. |
| 488 | const char * end = strchr(++lang, ']'); |
| 489 | if (end) language.assign(lang, end - lang); |
| 490 | } |
| 491 | #endif |
| 492 | |
468 | 493 | const vector<Action> &v = index_spec[field]; |
469 | 494 | string old_value = value; |
470 | 495 | vector<Action>::const_iterator i; |
… |
… |
again:
|
660 | 685 | string data; |
661 | 686 | map<string, list<string> >::const_iterator i; |
662 | 687 | for (i = fields.begin(); i != fields.end(); ++i) { |
| 688 | // If there's an explicitly set language, discard any |
| 689 | // detected one. FIXME: Don't waste effort detecting |
| 690 | // in this case. |
| 691 | if (i->first == "language") language.resize(0); |
663 | 692 | list<string>::const_iterator j; |
664 | 693 | for (j = i->second.begin(); j != i->second.end(); j++) { |
665 | 694 | data += i->first; |
… |
… |
again:
|
668 | 697 | data += '\n'; |
669 | 698 | } |
670 | 699 | } |
| 700 | #ifdef HAVE_LIBTEXTCAT |
| 701 | if (!language.empty()) { |
| 702 | // Auto-detected language. |
| 703 | data += "language="; |
| 704 | data += language; |
| 705 | data += '\n'; |
| 706 | } |
| 707 | #endif |
671 | 708 | |
672 | 709 | // Put the data in the document |
673 | 710 | doc.set_data(data); |
… |
… |
try {
|
707 | 744 | // If the database already exists, default to updating not overwriting. |
708 | 745 | int database_mode = Xapian::DB_CREATE_OR_OPEN; |
709 | 746 | verbose = false; |
710 | | Xapian::Stem stemmer("english"); |
| 747 | Xapian::Stem stemmer(curr_lang); |
711 | 748 | |
712 | 749 | static const struct option longopts[] = { |
713 | 750 | { "help", no_argument, NULL, 'h' }, |
… |
… |
try {
|
772 | 809 | exit(show_help ? 0 : 1); |
773 | 810 | } |
774 | 811 | |
| 812 | #ifdef HAVE_LIBTEXTCAT |
| 813 | textcat = textcat_Init(LANGCLASS_CONF); |
| 814 | #endif |
| 815 | |
775 | 816 | parse_index_script(argv[1]); |
776 | 817 | |
777 | 818 | // Open the database. |