|
package com.java_min.test;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.junit.Test;
import com.java_min.util.File2DocumentUtils;
public class HelloWorld {
private String filePath = "D://workspace//luceneDemo//luceneDateSource//test.txt"; private String indexPath = "D://workspace//luceneDemo//luceneIndex"; private Analyzer analyzer = new StandardAnalyzer(); /** * * @throws Exception * Administrator Jun 15, 2010 * * 创建索引 * IndexWriter是用来操作(增、删、改)索引库的 * */ @Test public void createIndex() throws Exception{ /** * 文件系统路径(将索引放入磁盘) */ Directory directory = FSDirectory.getDirectory(indexPath); /** * 内存路径(将索引放入内存) */ //Directory directory = new RAMDirectory(); Document doc = File2DocumentUtils.file2Document(filePath); IndexWriter indexWriter = new IndexWriter(directory,analyzer,true,MaxFieldLength.LIMITED); indexWriter.addDocument(doc); indexWriter.close(); } //搜索 @Test public void searchWord() throws Exception { String queryStr = "中国足球"; //1.把要搜索的文本解析为Query String[] fields = {"name","context"}; QueryParser queryParser = new MultiFieldQueryParser(fields,analyzer); Query query = queryParser.parse(queryStr); //2.进行查询 IndexSearcher indexSearcher = new IndexSearcher(indexPath); Filter filter = null; TopDocs topDocs = indexSearcher.search(query, filter,100000); System.out.println("当前共搜索到【"+topDocs.totalHits+"】条匹配结果"); //3.打印结果 for(ScoreDoc scoreDoc:topDocs.scoreDocs){ int doc = scoreDoc.doc; //文档内部编号 Document document = indexSearcher.doc(doc); //根据编号取出相应文档 File2DocumentUtils.printDocumentInfo(document); //打印出文档信息 } } }
package com.java_min.util;
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumberTools; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store;
public class File2DocumentUtils {
/** * * @param path * @return * Administrator Jun 16, 2010 * * file到document的转换 */ public static Document file2Document(String path){
File file = new File(path); Document document = new Document(); document.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED)); document.add(new Field("context",getFileContext(file),Store.YES,Index.ANALYZED)); document.add(new Field("size", NumberTools.longToString(file.length()),Store.YES,Index.NOT_ANALYZED)); document.add(new Field("path",file.getAbsolutePath(),Store.YES,Index.NO)); return document; } /** * * @param doc * Administrator Jun 16, 2010 * * document到file的转换 */ public static void document2File(Document doc){ } /** * * @param file * @return * Administrator Jun 16, 2010 * * 得到文件内容 */ public static String getFileContext(File file){ StringBuffer context = new StringBuffer(); try { BufferedReader bf = new BufferedReader(new InputStreamReader(new FileInputStream(file))); for(String line = null;(line = bf.readLine()) != null;){ context.append(line).append("/n"); } } catch (Exception e) { throw new RuntimeException(e); } return context.toString() ; } /** * * @param document * Administrator Jun 16, 2010 * * 打印document * * 获取name属性的值的两种方法: * 1. Field f = doc.getField("name"); * f.stringValue(); * * 2. doc.get("name") */ public static void printDocumentInfo(Document document){ System.out.println("文件名为--------------" + document.get("name")); System.out.println("文件内容为-------------" + document.get("context")); System.out.println("文件大小为-------------" + NumberTools.stringToLong(document.get("size"))); System.out.println("文件路径为-------------" + document.get("path")); } } |