Ticket #716: Index1.java

File Index1.java, 3.5 KB (added by Aakash, 8 years ago)

Java implementation of index1.py

Line 
1package org.xapian.examples;
2
3import java.io.File;
4import java.io.FileNotFoundException;
5import java.util.Scanner;
6import java.util.logging.Level;
7import java.util.logging.Logger;
8import org.xapian.Document;
9import org.xapian.Stem;
10import org.xapian.TermGenerator;
11import org.xapian.WritableDatabase;
12import org.xapian.XapianConstants;
13import org.xapian.XapianJNI;
14
15public class Index1 {
16
17 // Command line args - dbpath datapath
18 public static void main(String[] args)
19 {
20 if(args.length < 2)
21 {
22 System.out.println("Insufficient number of arguments (should be dbpath datapath)");
23 return;
24 }
25 index(args[1], args[0]);
26 }
27
28 public static void index(String datapath, String dbpath)
29 {
30 // Create or open the database we're goign to be writing to.
31 WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_CREATE_OR_OPEN);
32
33 // Set up a TermGenerator that we'll use in indexing.
34 TermGenerator termGenerator = new TermGenerator();
35 termGenerator.setStemmer(new Stem("en"));
36
37 //Parsing the CSV input file
38 Scanner csvScanner,lineScanner;
39 csvScanner = lineScanner = null;
40
41 try {
42 File csv = new File(datapath);
43 csvScanner = new Scanner(csv);
44 } catch (FileNotFoundException ex) {
45 Logger.getLogger(Index1.class.getName()).log(Level.SEVERE, null, ex);
46 }
47
48 //Ignoring first line (contains descriptors)
49 csvScanner.nextLine();
50
51 while(csvScanner.hasNextLine())
52 {
53 String currentLine = csvScanner.nextLine();
54 lineScanner = new Scanner(currentLine);
55 lineScanner.useDelimiter(",");
56
57 /* Parsing each line for identifier, title, and description */
58
59 //Identifier is the first comma seperated value (according to CSV file)
60 String identifier = lineScanner.next();
61
62 //Title is third comma seperated value
63 lineScanner.next();
64 String title = lineScanner.next();
65
66 //Description is ninth comma sperated value
67 for(int i=0;i<5;i++)
68 lineScanner.next();
69 String description = lineScanner.next();
70
71 /* Finished Parsing line */
72
73 // We make a document and tell the term generator to use this.
74 Document doc = new Document();
75 termGenerator.setDocument(doc);
76
77 // Index each field with a suitable prefix.
78 termGenerator.indexText(title, 1, "S");
79 termGenerator.indexText(description, 1, "XD");
80
81 // Index fields without prefixes for general search.
82 termGenerator.indexText(title);
83 termGenerator.increaseTermpos();
84 termGenerator.indexText(description);
85
86 // Store all fields for display purposes
87 doc.setData(currentLine);
88 doc.addValue(0, title);
89
90 // We use the identifier to ensure each object ends up in the
91 // database only once no matter how many times we run the
92 // indexer.
93 String idterm = "Q"+identifier;
94 doc.addBooleanTerm(idterm);
95 db.replaceDocument(idterm, doc);
96 }
97
98 // Commit to write documents to disk
99 db.commit();
100
101 lineScanner.close();
102 csvScanner.close();
103 }
104}