1 | #!/usr/bin/env python
|
---|
2 |
|
---|
3 | import sys
|
---|
4 | import logging
|
---|
5 | import getopt
|
---|
6 | import xapian
|
---|
7 | import os
|
---|
8 |
|
---|
9 |
|
---|
10 | def get_memory():
|
---|
11 | return "memory used = %s" % os.popen('ps -p %d -o rss -o vsz | tail -1' % os.getpid()).read().strip()
|
---|
12 |
|
---|
13 |
|
---|
14 | def test(name, get_data):
|
---|
15 | logging.info('version: %s', xapian.version_string())
|
---|
16 | database = xapian.Database(name)
|
---|
17 | enquiry = xapian.Enquire(database)
|
---|
18 | enquiry.set_query(xapian.Query.MatchAll)
|
---|
19 | matches = enquiry.get_mset(0, database.get_doccount(), None, None)
|
---|
20 | total = matches.get_matches_estimated()
|
---|
21 | logging.info('matches: %s %s', total, get_memory())
|
---|
22 |
|
---|
23 | for index, match in enumerate(matches, 1):
|
---|
24 | if get_data:
|
---|
25 | match.document.get_data()
|
---|
26 |
|
---|
27 | if index % 10000 == 0:
|
---|
28 | logging.info('read: %s %s', index, get_memory())
|
---|
29 |
|
---|
30 |
|
---|
31 | def main(arguments):
|
---|
32 | level = logging.INFO
|
---|
33 | get_data = True
|
---|
34 | options, arguments = getopt.getopt(arguments, "l:d")
|
---|
35 | for option, argument in options:
|
---|
36 | if option == '-l':
|
---|
37 | level = getattr(logging, argument.upper())
|
---|
38 | elif option == '-d':
|
---|
39 | get_data = False
|
---|
40 |
|
---|
41 | logging.basicConfig(level=level)
|
---|
42 | test(arguments[0], get_data)
|
---|
43 |
|
---|
44 |
|
---|
45 | if __name__ == '__main__':
|
---|
46 | main(sys.argv[1:])
|
---|