package com.npf.lucene;import java.io.File;import java.util.ArrayList;import java.util.List;import org.apache.commons.io.FileUtils;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field.Store;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class IndexManager { public static ClassLoader manager = IndexManager.class.getClassLoader(); public static final String dirPath = manager.getResource("com/npf/lucene/file").getPath(); public static final String indexPath = manager.getResource("com/npf/lucene/index").getPath(); public static void main(String[] args) throws Exception{ //1. 创建文档列表,用来保存多个Document List<Document> docList = createDocumentList(); //2.获取源文件所在目录 File dir = getSourceFileDirectory(); //3.遍历源文件所在目录下的所有文件,并将每个具体文件封装成Document,加入Document列表中 docList = lookupFileIntoDocument(docList,dir); //4.创建分词器Analyzer Analyzer analyzer = createAnalyzer(); //5.指定索引存储的目录 Directory directory = createDirectory(); //6.创建索引写对象 IndexWriter indexWriter = createIndexWriter(analyzer,directory); //7.将文档加入索引写对象中 addDocumentToIndexWriter(indexWriter,docList); //8.提交索引写对象 indexWriterCommit(indexWriter); //9.关闭索引写对象 indexWriterClose(indexWriter); } /** * 创建文档列表,保存多个Document */ public static List<Document> createDocumentList(){ List<Document> docList = new ArrayList<Document>(); return docList; } /** * 获取源文件所在目录 */ public static File getSourceFileDirectory(){ File dir = new File(dirPath); return dir; } /** * 遍历源文件所在目录下的所有文件,并将每个具体文件封装成Document,加入Document列表中 */ public static List<Document> lookupFileIntoDocument(final List<Document> docList,File dir) throws Exception{ for(File file : dir.listFiles()){ String fileName = file.getName(); String fileContext = FileUtils.readFileToString(file); Long fileSize = FileUtils.sizeOf(file); Document doc = new Document(); //第一个参数:域名 //第二个参数:域值 //第三个参数:是否存储,是为yes,不存储为no TextField nameFiled = new TextField("fileName", fileName, Store.YES); TextField contextFiled = new TextField("fileContext", fileContext, Store.YES); TextField sizeFiled = new TextField("fileSize", fileSize.toString(), Store.YES); //将所有的域都存入文档中 doc.add(nameFiled); doc.add(contextFiled); doc.add(sizeFiled); docList.add(doc); } return docList; } /** * 创建分词器,StandardAnalyzer标准分词器,标准分词器对英文分词效果很好,对中文是单字分词 */ public static Analyzer createAnalyzer(){ Analyzer analyzer = new StandardAnalyzer(); return analyzer; } /** * 指定索引和文档存储的目录 */ public static Directory createDirectory() throws Exception{ Directory directory = FSDirectory.open(new File(indexPath)); return directory; } /** * 创建索引写对象 */ public static IndexWriter createIndexWriter(Analyzer analyzer,Directory directory) throws Exception{ //创建索引和文档的写对象的初始化对象 IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer); //创建索引和文档的写对象 IndexWriter indexWriter = new IndexWriter(directory, config); return indexWriter; } /** * 将文档加入到索引写对象中 */ public static void addDocumentToIndexWriter(IndexWriter indexWriter,List<Document> docList) throws Exception{ for(Document doc1 : docList){ indexWriter.addDocument(doc1); } } /** * 索引写对象的提交 */ public static void indexWriterCommit(IndexWriter indexWriter) throws Exception{ indexWriter.commit(); } /** * 索引写对象的关闭 */ public static void indexWriterClose(IndexWriter indexWriter) throws Exception{ indexWriter.close(); }}六、查询索引
6.1 实现步骤第一步:创建分词器,创建索引和使用时所用的分词器必须一致。第二步:创建查询对象。第三步:指定索引的目录,也就是索引库存放的位置。第四步:创建索引的读取对象。第五步:创建索引的搜索对象。第六步:执行查询。6.2 IndexSearcher搜索方法
方法 | 说明 |
indexSearcher.search(query, n) | 根据Query搜索,返回评分最高的n条记录 |
indexSearcher.search(query, filter, n) | 根据Query搜索,添加过滤策略,返回评分最高的n条记录 |
indexSearcher.search(query, n, sort) | 根据Query搜索,添加排序策略,返回评分最高的n条记录 |
indexSearcher.search(query, filter, n, sort) | 根据Query搜索,添加过滤策略,添加排序策略,返回评分最高的n条记录 |
package com.npf.lucene;import java.io.File;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;public class IndexSearch { public static ClassLoader manager = IndexManager.class.getClassLoader(); public static final String indexPath = manager.getResource("com/npf/lucene/index").getPath(); public static void main(String[] args) throws Exception{ //1.创建分词器(创建索引和使用时所用的分词器必须一致) Analyzer analyzer = createAnalyzer(); //2.创建查询对象 Query query = createQuery(analyzer); //3.指定索引的目录 Directory dir = createDirectory(); //4.创建索引的读取对象 IndexReader indexReader = createIndexReader(dir); //5.创建索引的搜索对象 IndexSearcher indexSearcher = createIndexSearcher(indexReader); //搜索:第一个参数为查询语句对象, 第二个参数:指定显示多少条 TopDocs topdocs = indexSearcher.search(query, 2); //一共搜索到多少条记录 System.out.println("=====count=====" + topdocs.totalHits); //从搜索结果对象中获取结果集 ScoreDoc[] scoreDocs = topdocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs){ //获取docID int docID = scoreDoc.doc; //通过文档ID从硬盘中读取出对应的文档 Document document = indexReader.document(docID); //get域名可以取出值 打印 System.out.println("fileName:" + document.get("fileName")); System.out.println("fileSize:" + document.get("fileSize")); System.out.println("============================================================"); } } /** * 创建索引的搜索对象 */ public static IndexSearcher createIndexSearcher(IndexReader indexReader) { IndexSearcher indexSearcher = new IndexSearcher(indexReader); return indexSearcher; } /** * 创建分词器,StandardAnalyzer标准分词器,标准分词器对英文分词效果很好,对中文是单字分词 */ public static Analyzer createAnalyzer(){ Analyzer analyzer = new StandardAnalyzer(); return analyzer; } /** * 创建查询对象,第一个参数:默认搜索域, 第二个参数:分词器 * 默认搜索域作用:如果搜索语法中指定了域名,从指定域中搜索,如果搜索时只写了查询关键字,则从默认搜索域中进行搜索 */ public static Query createQuery(Analyzer analyzer) throws Exception{ QueryParser queryParser = new QueryParser("fileContext", analyzer); //查询语法=域名:搜索的关键字 Query query = queryParser.parse("fileContext:recommended"); return query; } /** * 指定索引的目录 */ public static Directory createDirectory() throws Exception{ Directory directory = FSDirectory.open(new File(indexPath)); return directory; } /** * 索引的读取对象 */ public static IndexReader createIndexReader(Directory dir) throws Exception{ IndexReader indexReader = IndexReader.open(dir); return indexReader; }}6.4 TopDocsLucene搜索结果可通过TopDocs遍历,TopDocs类提供了少量的属性,如下:
方法或属性 | 说明 |
totalHits | 匹配搜索条件的总记录数 |
scoreDocs | 顶部匹配记录 |
源代码的github地址:https://github.com/lucene-in-action/lucene-index-manager-search
新闻热点
疑难解答