Lucene截止目前,最新版本为v7.4.0。它是一个开放源代码的全文检索引擎工具包,但它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。
目录:
1 Lucene环境相关
$ uname -a
Linux ubuntu 4.15.0-32-generic #35-Ubuntu SMP Fri Aug 10 17:58:07 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
$ java -version
java version “1.8.0_181”
Java(TM) SE Runtime Environment (build 1.8.0_181-b13)
Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)
补充 :对应的安装包名称为jdk-8u181-linux-x64.tar.gz,Ubuntu 18.04.1 LTS,MyEclipse CI 2018.8.0
References:
[1] https://www.genuitec.com/products/myeclipse/download/
[2] http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
[3] http://www.apache.org/dyn/closer.lua/lucene/java/7.4.0
2 Add External JARs…(右击项目->Build Path)
下载Lucene:http://www.apache.org/dyn/closer.lua/lucene/java/7.4.0
3 配置git
$ sudo apt install git
$ git config --global user.name "qingdujun"
$ git config --global user.email "123456@gmail.com"
$ ssh-keygen -t rsa -C "123456@gmail.com"
Generating public/private rsa key pair.
Enter file in which to save the key (/home/qingdujun/.ssh/id_rsa):
Created directory ‘/home/qingdujun/.ssh’.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /home/qingdujun/.ssh/id_rsa.
Your public key has been saved in /home/qingdujun/.ssh/id_rsa.pub.
$ vim /home/qingdujun/.ssh/id_rsa.pub
拷贝里面的内容至github.com->settings->SSH and GPG keys->New SSH key
References:
[1] https://www.liaoxuefeng.com/wiki/0013739516305929606dd18361248578c67b8067c8c017b000
[2] https://git-scm.com/book/zh/v1/Git-%E5%9F%BA%E7%A1%80-%E6%89%93%E6%A0%87%E7%AD%BE
[3] http://www.ruanyifeng.com/blog/2014/06/git_remote.html
4 Lucene v7.4.0 source code
git clone https://github.com/qingdujun/LuceneRes/releases/tag/v0.1
4.1 索引目录
~/Documents/LuceneRes/Index$ ls
_0.cfe _0.cfs _0.si segments_2 write.lock
4.2 运行结果
Console
Indexing /home/qingdujun/Documents/LuceneRes/Data/LuceneConstants.java
Indexing /home/qingdujun/Documents/LuceneRes/Data/Searcher.java
Indexing /home/qingdujun/Documents/LuceneRes/Data/LuceneTester.java
Indexing /home/qingdujun/Documents/LuceneRes/Data/TextFileFilter.java
Indexing /home/qingdujun/Documents/LuceneRes/Data/Indexer.java
5 File indexed, time taken: 128 ms
3 documents found. Time: 40 ms
File: /home/qingdujun/Documents/LuceneRes/Data/Indexer.java
File: /home/qingdujun/Documents/LuceneRes/Data/Searcher.java
File: /home/qingdujun/Documents/LuceneRes/Data/LuceneTester.java
5 附录(可运行源代码):
5.1 TextFileFilter.java
package com.cfs.lucene;
import java.io.File;
import java.io.FileFilter;
public class TextFileFilter implements FileFilter {
public static final String TEXT_TYPE = ".java" ;
@Override
public boolean accept (File pathname) {
return pathname.getName().toLowerCase().endsWith(TEXT_TYPE);
}
}
5.2 LuceneConstants.java
package com.cfs.lucene;
public class LuceneConstants {
public static final String CONTENTS = "contents" ;
public static final String FILE_NAME = "filename" ;
public static final String FILE_PATH = "filepath" ;
public static final int MAX_SEARCH = 10 ;
}
5.3 Indexer.java
package com.cfs.lucene;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Indexer {
private IndexWriter indexWriter = null ;
/**
* Construct index
* @param directoryPath The directory where the index is located
* @throws IOException
*/
public Indexer (String directoryPath) throws IOException{
Directory directory = FSDirectory.open(Paths.get(directoryPath));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
indexWriter = new IndexWriter (directory, indexWriterConfig);
}
/**
* Close write index
* @throws IOException
*/
public void close () throws IOException{
indexWriter.close();
}
/**
* Convert a file to an index object(document)
* @param file
* @return
* @throws IOException
*/
private Document getDocument (File file) throws IOException{
Document document = new Document();
Field contentsField = new TextField(LuceneConstants.CONTENTS, new FileReader(file));
document.add(contentsField);
Field fileNameField = new StringField(LuceneConstants.FILE_NAME, file.getName(),Store.YES);
document.add(fileNameField);
Field filePathField = new StringField(LuceneConstants.FILE_PATH, file.getCanonicalPath(), Store.YES);
document.add(filePathField);
return document;
}
/**
* Index file
* @param file
* @throws IOException
*/
private void indexFile (File file) throws IOException{
System.out.println("Indexing " +file.getCanonicalPath());
Document document = getDocument(file);
indexWriter.addDocument(document);
}
/**
* Index all files
* @param dataDirPath Indexed data storage directory
* @param fileFilter
* @return
* @throws IOException
*/
public int createIndex (String dataDirPath, FileFilter fileFilter) throws IOException{
File[] files = new File(dataDirPath).listFiles();
for (File file : files) {
if (!file.isDirectory() && !file.isHidden() && file.exists()
&& file.canRead() && fileFilter.accept(file)) {
indexFile(file);
}
}
return indexWriter.numDocs();
}
/**
* Read the entire contents of the file and return the bytes
* @param file
* @return
* @throws IOException
*/
5.4 Searcher.java
package com .cfs .lucene
import java.io .IOException
import java.nio .file .Paths
import org.apache .lucene .analysis .standard .StandardAnalyzer
import org.apache .lucene .document .Document
import org.apache .lucene .index .DirectoryReader
import org.apache .lucene .queryparser .classic .ParseException
import org.apache .lucene .queryparser .classic .QueryParser
import org.apache .lucene .search .IndexSearcher
import org.apache .lucene .search .Query
import org.apache .lucene .search .ScoreDoc
import org.apache .lucene .search .TopDocs
import org.apache .lucene .store .Directory
import org.apache .lucene .store .FSDirectory
public class Searcher {
IndexSearcher indexSearcher = null
QueryParser queryParser = null
Query query = null
Directory directory = null
DirectoryReader directoryReader = null
public Searcher(String directoryPath) throws IOException{
//Store the index in memory
//Directory directory = new RAMDirectory()
//Store an index on disk
directory = FSDirectory.open (Paths.get (directoryPath))
directoryReader = DirectoryReader.open (directory)
indexSearcher = new IndexSearcher(directoryReader)
queryParser = new QueryParser(LuceneConstants.CONTENTS , new StandardAnalyzer())
}
public TopDocs search(String searchQuery) throws ParseException, IOException {
query = queryParser.parse (searchQuery)
return indexSearcher.search (query, LuceneConstants.MAX _SEARCH)
}
public Document getDocument(ScoreDoc scoreDoc) throws IOException {
return indexSearcher.doc (scoreDoc.doc )
}
public void close() throws IOException {
directoryReader.close ()
directory.close ()
}
}
5.5 LuceneTester.java
package com.cfs.lucene;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
public class LuceneTester {
public static final String basePath = "/home/qingdujun/Documents/LuceneRes/" ;
public static final String indexDir = basePath+"Index" ;
public static final String dataDir = basePath+"Data" ;
private Indexer indexer = null ;
private Searcher searcher = null ;
/**
* Test the function of create index
* @throws IOException
*/
private void createIndex () throws IOException {
indexer = new Indexer(indexDir);
int numIndexed = 0 ;
long startTime = System.currentTimeMillis();
numIndexed = indexer.createIndex(dataDir, new TextFileFilter());
long endTime = System.currentTimeMillis();
indexer.close();
System.out.println(numIndexed+" File indexed, time taken: " +(endTime-startTime)+" ms" );
}
/**
* Test the function of search result
* @param searchQuery What you want to search for
* @throws IOException
* @throws ParseException
*/
private void search (String searchQuery) throws IOException, ParseException {
searcher = new Searcher(indexDir);
long startTime = System.currentTimeMillis();
TopDocs hits = searcher.search(searchQuery);
long endTime = System.currentTimeMillis();
System.out.println(hits.totalHits+" documents found. Time: " +(endTime-startTime)+" ms" );
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document document = searcher.getDocument(scoreDoc);
System.out.println("File: " +document.get(LuceneConstants.FILE_PATH));
}
searcher.close();
}
public static void main (String[] args) {
LuceneTester luceneTester = new LuceneTester();
try {
luceneTester.createIndex();
luceneTester.search("throws IOException" );
} catch (Exception e) {
e.printStackTrace();
}
}
}
qingdujun
2018-8-30 于 北京 海淀