Context Navigation

Back to Ticket #716

Ticket #716: Index1.java

File Index1.java, 3.5 KB (added by Aakash, 8 years ago)
Java implementation of index1.py

Line
1	package org.xapian.examples;
2
3	import java.io.File;
4	import java.io.FileNotFoundException;
5	import java.util.Scanner;
6	import java.util.logging.Level;
7	import java.util.logging.Logger;
8	import org.xapian.Document;
9	import org.xapian.Stem;
10	import org.xapian.TermGenerator;
11	import org.xapian.WritableDatabase;
12	import org.xapian.XapianConstants;
13	import org.xapian.XapianJNI;
14
15	public class Index1 {
16
17	// Command line args - dbpath datapath
18	public static void main(String[] args)
19	{
20	if(args.length < 2)
21	{
22	System.out.println("Insufficient number of arguments (should be dbpath datapath)");
23	return;
24	}
25	index(args[1], args[0]);
26	}
27
28	public static void index(String datapath, String dbpath)
29	{
30	// Create or open the database we're goign to be writing to.
31	WritableDatabase db = new WritableDatabase(dbpath, XapianConstants.DB_CREATE_OR_OPEN);
32
33	// Set up a TermGenerator that we'll use in indexing.
34	TermGenerator termGenerator = new TermGenerator();
35	termGenerator.setStemmer(new Stem("en"));
36
37	//Parsing the CSV input file
38	Scanner csvScanner,lineScanner;
39	csvScanner = lineScanner = null;
40
41	try {
42	File csv = new File(datapath);
43	csvScanner = new Scanner(csv);
44	} catch (FileNotFoundException ex) {
45	Logger.getLogger(Index1.class.getName()).log(Level.SEVERE, null, ex);
46	}
47
48	//Ignoring first line (contains descriptors)
49	csvScanner.nextLine();
50
51	while(csvScanner.hasNextLine())
52	{
53	String currentLine = csvScanner.nextLine();
54	lineScanner = new Scanner(currentLine);
55	lineScanner.useDelimiter(",");
56
57	/* Parsing each line for identifier, title, and description */
58
59	//Identifier is the first comma seperated value (according to CSV file)
60	String identifier = lineScanner.next();
61
62	//Title is third comma seperated value
63	lineScanner.next();
64	String title = lineScanner.next();
65
66	//Description is ninth comma sperated value
67	for(int i=0;i<5;i++)
68	lineScanner.next();
69	String description = lineScanner.next();
70
71	/* Finished Parsing line */
72
73	// We make a document and tell the term generator to use this.
74	Document doc = new Document();
75	termGenerator.setDocument(doc);
76
77	// Index each field with a suitable prefix.
78	termGenerator.indexText(title, 1, "S");
79	termGenerator.indexText(description, 1, "XD");
80
81	// Index fields without prefixes for general search.
82	termGenerator.indexText(title);
83	termGenerator.increaseTermpos();
84	termGenerator.indexText(description);
85
86	// Store all fields for display purposes
87	doc.setData(currentLine);
88	doc.addValue(0, title);
89
90	// We use the identifier to ensure each object ends up in the
91	// database only once no matter how many times we run the
92	// indexer.
93	String idterm = "Q"+identifier;
94	doc.addBooleanTerm(idterm);
95	db.replaceDocument(idterm, doc);
96	}
97
98	// Commit to write documents to disk
99	db.commit();
100
101	lineScanner.close();
102	csvScanner.close();
103	}
104	}

Download in other formats:

Original Format