1 | package org.xapian.examples;
|
---|
2 |
|
---|
3 | import java.io.File;
|
---|
4 | import java.io.FileNotFoundException;
|
---|
5 | import java.util.Scanner;
|
---|
6 | import java.util.logging.Level;
|
---|
7 | import java.util.logging.Logger;
|
---|
8 | import org.xapian.Document;
|
---|
9 | import org.xapian.Stem;
|
---|
10 | import org.xapian.TermGenerator;
|
---|
11 | import org.xapian.WritableDatabase;
|
---|
12 | import org.xapian.XapianConstants;
|
---|
13 | import org.xapian.XapianJNI;
|
---|
14 |
|
---|
15 | public class Index1 {
|
---|
16 |
|
---|
17 | // Command line args - dbpath datapath
|
---|
18 | public static void main(String[] args)
|
---|
19 | {
|
---|
20 | if(args.length < 2)
|
---|
21 | {
|
---|
22 | System.out.println("Insufficient number of arguments (should be dbpath datapath)");
|
---|
23 | return;
|
---|
24 | }
|
---|
25 | index(args[1], args[0]);
|
---|
26 | }
|
---|
27 |
|
---|
28 | public static void index(String datapath, String dbpath)
|
---|
29 | {
|
---|
30 | // Create or open the database we're goign to be writing to.
|
---|
31 | WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_CREATE_OR_OPEN);
|
---|
32 |
|
---|
33 | // Set up a TermGenerator that we'll use in indexing.
|
---|
34 | TermGenerator termGenerator = new TermGenerator();
|
---|
35 | termGenerator.setStemmer(new Stem("en"));
|
---|
36 |
|
---|
37 | //Parsing the CSV input file
|
---|
38 | Scanner csvScanner,lineScanner;
|
---|
39 | csvScanner = lineScanner = null;
|
---|
40 |
|
---|
41 | try {
|
---|
42 | File csv = new File(datapath);
|
---|
43 | csvScanner = new Scanner(csv);
|
---|
44 | } catch (FileNotFoundException ex) {
|
---|
45 | Logger.getLogger(Index1.class.getName()).log(Level.SEVERE, null, ex);
|
---|
46 | }
|
---|
47 |
|
---|
48 | //Ignoring first line (contains descriptors)
|
---|
49 | csvScanner.nextLine();
|
---|
50 |
|
---|
51 | while(csvScanner.hasNextLine())
|
---|
52 | {
|
---|
53 | String currentLine = csvScanner.nextLine();
|
---|
54 | lineScanner = new Scanner(currentLine);
|
---|
55 | lineScanner.useDelimiter(",");
|
---|
56 |
|
---|
57 | /* Parsing each line for identifier, title, and description */
|
---|
58 |
|
---|
59 | //Identifier is the first comma seperated value (according to CSV file)
|
---|
60 | String identifier = lineScanner.next();
|
---|
61 |
|
---|
62 | //Title is third comma seperated value
|
---|
63 | lineScanner.next();
|
---|
64 | String title = lineScanner.next();
|
---|
65 |
|
---|
66 | //Description is ninth comma sperated value
|
---|
67 | for(int i=0;i<5;i++)
|
---|
68 | lineScanner.next();
|
---|
69 | String description = lineScanner.next();
|
---|
70 |
|
---|
71 | /* Finished Parsing line */
|
---|
72 |
|
---|
73 | // We make a document and tell the term generator to use this.
|
---|
74 | Document doc = new Document();
|
---|
75 | termGenerator.setDocument(doc);
|
---|
76 |
|
---|
77 | // Index each field with a suitable prefix.
|
---|
78 | termGenerator.indexText(title, 1, "S");
|
---|
79 | termGenerator.indexText(description, 1, "XD");
|
---|
80 |
|
---|
81 | // Index fields without prefixes for general search.
|
---|
82 | termGenerator.indexText(title);
|
---|
83 | termGenerator.increaseTermpos();
|
---|
84 | termGenerator.indexText(description);
|
---|
85 |
|
---|
86 | // Store all fields for display purposes
|
---|
87 | doc.setData(currentLine);
|
---|
88 | doc.addValue(0, title);
|
---|
89 |
|
---|
90 | // We use the identifier to ensure each object ends up in the
|
---|
91 | // database only once no matter how many times we run the
|
---|
92 | // indexer.
|
---|
93 | String idterm = "Q"+identifier;
|
---|
94 | doc.addBooleanTerm(idterm);
|
---|
95 | db.replaceDocument(idterm, doc);
|
---|
96 | }
|
---|
97 |
|
---|
98 | // Commit to write documents to disk
|
---|
99 | db.commit();
|
---|
100 |
|
---|
101 | lineScanner.close();
|
---|
102 | csvScanner.close();
|
---|
103 | }
|
---|
104 | }
|
---|