Date start = new Date();try { IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new SmartChineseAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); indexDocs(writer, docDir); System.out.PRintln("Indexing to directory '" +INDEX_DIR+ "'..."); System.out.println("Optimizing..."); //writer.optimize(); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } SearchChinese.java
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); BufferedReader in = null;if (queries != null) { in = new BufferedReader(new FileReader(queries));} else { in = new BufferedReader(new InputStreamReader(System.in, "GBK"));} 在这里, 我制定了输入的查询是采用GBK编码的.
static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { System.out.println("adding " + file); try { writer.addDocument(FileDocument.Document(file)); } // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help catch (FileNotFoundException fnfe) { ; } } } 重点在于这一句: