package com.jadyer.lucene;

import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version; /**
* 【Lucene3.6.2入门系列】第03节_简述Lucene中常见的搜索功能
* @create Aug 1, 2013 3:54:27 PM
* @author 玄玉<http://blog.csdn.net/jadyer>
*/
public class HelloSearch {
private Directory directory;
private IndexReader reader;
private String[] ids = {"1", "2", "3", "4", "5", "6"};
private String[] names = {"Michael", "Scofield", "Tbag", "Jack", "Jade", "Jadyer"};
private String[] emails = {"aa@jadyer.us", "bb@jadyer.cn", "cc@jadyer.cc", "dd@jadyer.tw", "ee@jadyer.hk", "ff@jadyer.me"};
private String[] contents = {"my java blog is http://blog.csdn.net/jadyer", "my website is http://www.jadyer.cn", "my name is jadyer", "I am JavaDeveloper", "I am from Haerbin", "I like Lucene"};
private int[] attachs = {9,3,5,4,1,2};
private Date[] dates = new Date[ids.length]; public HelloSearch(){
IndexWriter writer = null;
Document doc = null;
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
try {
dates[0] = sdf.parse("20120601");
dates[1] = sdf.parse("20120603");
dates[2] = sdf.parse("20120605");
dates[3] = sdf.parse("20120607");
dates[4] = sdf.parse("20120609");
dates[5] = sdf.parse("20120611");
directory = FSDirectory.open(new File("myExample/03_index/"));
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
writer.deleteAll(); //创建索引之前,先把文档清空掉
for(int i=0; i<ids.length; i++){ //遍历ID来创建文档
doc = new Document();
doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("email", "test"+i+""+i+"@jadyer.com", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
doc.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attachs[i])); //为数字加索引(第三个参数指定是否索引)
doc.add(new NumericField("attach", Field.Store.YES, true).setIntValue((i+1)*100)); //假设有多个附件
doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime())); //为日期加索引
writer.addDocument(doc);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if(null != writer){
try {
writer.close();
} catch (IOException ce) {
ce.printStackTrace();
}
}
}
} /**
* 针对分页搜索创建索引
*/
public HelloSearch(boolean pageFlag){
String[] myNames = new String[50];
String[] myContents = new String[50];
for(int i=0; i<50; i++){
myNames[i] = "file(" + i + ")";
myContents[i] = "I love JavaSE, also love Lucene(" + i + ")";
}
IndexWriter writer = null;
Document doc = null;
try {
directory = FSDirectory.open(new File("myExample/03_index/"));
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
writer.deleteAll();
for(int i=0; i<myNames.length; i++){
doc = new Document();
doc.add(new Field("myname", myNames[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("mycontent", myContents[i], Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if(null != writer){
try {
writer.close();
} catch (IOException ce) {
ce.printStackTrace();
}
}
}
} /**
* 获取IndexSearcher实例
*/
private IndexSearcher getIndexSearcher(){
try {
if(reader == null){
reader = IndexReader.open(directory);
}else{
//if the index was changed since the provided reader was opened, open and return a new reader; else,return null
//如果当前reader在打开期间index发生改变,则打开并返回一个新的IndexReader,否则返回null
IndexReader ir = IndexReader.openIfChanged(reader);
if(ir != null){
reader.close(); //关闭原reader
reader = ir; //赋予新reader
}
}
return new IndexSearcher(reader);
}catch(Exception e) {
e.printStackTrace();
}
return null; //发生异常则返回null
} /**
* 执行搜索操作
* @param query 搜索的Query对象
*/
private void doSearch(Query query){
IndexSearcher searcher = this.getIndexSearcher();
try {
//第二个参数指定搜索后显示的最多的记录数,其与tds.totalHits没有联系
TopDocs tds = searcher.search(query, 10);
System.out.println("本次搜索到[" + tds.totalHits + "]条记录");
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.print("文档编号=" + sd.doc + " 文档权值=" + doc.getBoost() + " 文档评分=" + sd.score + " ");
System.out.print("id=" + doc.get("id") + " email=" + doc.get("email") + " name=" + doc.get("name") + " ");
//获取多个同名域的方式
String[] attachValues = doc.getValues("attach");
for(String attach : attachValues){
System.out.print("attach=" + attach + " ");
}
System.out.println();
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if(null != searcher){
try {
searcher.close(); //记得关闭IndexSearcher
} catch (IOException e) {
e.printStackTrace();
}
}
}
} /**
* 精确匹配搜索
* @param fieldName 域名(相当于表的字段名)
* @param keyWords 搜索的关键字
*/
public void searchByTerm(String fieldName, String keyWords){
Query query = new TermQuery(new Term(fieldName, keyWords));
this.doSearch(query);
} /**
* 基于范围的搜索
* @param fieldName 域名(相当于表的字段名)
* @param start 开始字符
* @param end 结束字符
*/
public void searchByTermRange(String fieldName, String start, String end){
Query query = new TermRangeQuery(fieldName, start, end, true, true); //后面两个参数用于指定开区间或闭区间
this.doSearch(query);
} /**
* 针对数字的搜索
*/
public void searchByNumericRange(String fieldName, int min, int max){
Query query = NumericRangeQuery.newIntRange(fieldName, min, max, true, true);
this.doSearch(query);
} /**
* 基于前缀的搜索
* @see 它是对Field分词后的结果进行前缀查找的结果
*/
public void searchByPrefix(String fieldName, String prefix){
Query query = new PrefixQuery(new Term(fieldName, prefix));
this.doSearch(query);
} /**
* 基于通配符的搜索
* @see *-->任意多个字符
* @see ?-->一个字符
*/
public void searchByWildcard(String fieldName, String wildcard){
Query query = new WildcardQuery(new Term(fieldName, wildcard));
this.doSearch(query);
} /**
* 模糊搜索
* @see 与通配符搜索不同
*/
public void searchByFuzzy(String fieldName, String fuzzy){
Query query = new FuzzyQuery(new Term(fieldName, fuzzy));
this.doSearch(query);
} /**
* 多条件搜索
* @see 本例中搜索name值中以Ja开头,且content中包含am的内容
* @see Occur.MUST------表示此条件必须为true
* @see Occur.MUST_NOT--表示此条件必须为false
* @see Occur.SHOULD----表示此条件非必须
*/
public void searchByBoolean(){
BooleanQuery query = new BooleanQuery();
query.add(new WildcardQuery(new Term("name", "Ja*")), Occur.MUST);
query.add(new TermQuery(new Term("content", "am")), Occur.MUST);
this.doSearch(query);
} /**
* 短语搜索
* @see 很遗憾的是短语查询对中文搜索没有太大的作用,但对英文搜索是很好用的,但它的开销比较大,尽量少用
*/
public void searchByPhrase(){
PhraseQuery query = new PhraseQuery();
query.setSlop(1); //设置跳数
query.add(new Term("content", "am")); //第一个Term
query.add(new Term("content", "Haerbin")); //产生距离之后的第二个Term
this.doSearch(query);
} /**
* 基于QueryParser的搜索
*/
public void searchByQueryParse(){
QueryParser parser = new QueryParser(Version.LUCENE_36, "content", new StandardAnalyzer(Version.LUCENE_36));
Query query = null;
try {
// query = parser.parse("Haerbin"); //搜索content中包含[Haerbin]的记录
// query = parser.parse("I AND Haerbin"); //搜索content中包含[I]和[Haerbin]的记录
// query = parser.parse("Lucene OR Haerbin"); //搜索content中包含[Lucene]或者[Haerbin]的记录
// query = parser.parse("Lucene Haerbin"); //搜索content中包含[Lucene]或者[Haerbin]的记录
// parser.setDefaultOperator(Operator.AND); //将空格的默认操作OR修改为AND
// //1)如果name域在索引时,不进行分词,那么无论这里写成[name:Jadyer]还是[name:jadyer],最后得到的都是0条记录
// //2)由于name原值为大写[J],若索引时不对name分词,除非修改name原值为小写[j],并且搜索[name:jadyer]才能得到记录
// query = parser.parse("name:Jadyer"); //修改搜索域为name=Jadyer的记录
// query = parser.parse("name:Ja*"); //支持通配符
// query = parser.parse("\"I am\""); //搜索content中包含[I am]的记录(注意不能使用parse("content:'I am'"))
// parser.setAllowLeadingWildcard(true); //设置允许[*]或[?]出现在查询字符的第一位,即[name:*de],否则[name:*de]会报异常
// query = parser.parse("name:*de"); //Lucene默认的第一个字符不允许为通配符,因为这样效率比较低
// //parse("+am +name:Jade")--------------搜索content中包括[am]的,并且name=Jade的记录
// //parse("am AND NOT name:Jade")--------搜索content中包括[am]的,并且nam不是Jade的记录
// //parse("(blog OR am) AND name:Jade")--搜索content中包括[blog]或者[am]的,并且name=Jade的记录
// query = parser.parse("-name:Jack +I"); //搜索content中包括[I]的,并且name不是Jack的记录(加减号要放到域说明的前面)
// query = parser.parse("id:[1 TO 3]"); //搜索id值从1到3的记录(TO必须大写,且这种方式没有办法匹配数字)
// query = parser.parse("id:{1 TO 3}"); //搜索id=2的记录
query = parser.parse("name:Jadk~"); //模糊搜索
} catch (ParseException e) {
e.printStackTrace();
}
this.doSearch(query);
} /**
* 普通的分页搜索
* @see 适用于lucene3.5之前
* @param expr 搜索表达式
* @param pageIndex 页码
* @param pageSize 分页大小
*/
public void searchPage(String expr, int pageIndex, int pageSize){
IndexSearcher searcher = this.getIndexSearcher();
QueryParser parser = new QueryParser(Version.LUCENE_36, "mycontent", new StandardAnalyzer(Version.LUCENE_36));
try {
Query query = parser.parse(expr);
TopDocs tds = searcher.search(query, pageIndex*pageSize);
ScoreDoc[] sds = tds.scoreDocs;
for(int i=(pageIndex-1)*pageSize; i<pageIndex*pageSize; i++){
Document doc = searcher.doc(sds[i].doc);
System.out.println("文档编号:" + sds[i].doc + "-->" + doc.get("myname") + "-->" + doc.get("mycontent"));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if(null != searcher){
try {
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
} /**
* 基于searchAfter的分页搜索
* @see 适用于Lucene3.5
* @param expr 搜索表达式
* @param pageIndex 页码
* @param pageSize 分页大小
*/
public void searchPageByAfter(String expr, int pageIndex, int pageSize){
IndexSearcher searcher = this.getIndexSearcher();
QueryParser parser = new QueryParser(Version.LUCENE_36, "mycontent", new StandardAnalyzer(Version.LUCENE_36));
try {
Query query = parser.parse(expr);
TopDocs tds = searcher.search(query, (pageIndex-1)*pageSize);
//使用IndexSearcher.searchAfter()搜索,该方法第一个参数为上一页记录中的最后一条记录
if(pageIndex > 1){
tds = searcher.searchAfter(tds.scoreDocs[(pageIndex-1)*pageSize-1], query, pageSize);
}else{
tds = searcher.searchAfter(null, query, pageSize);
}
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println("文档编号:" + sd.doc + "-->" + doc.get("myname") + "-->" + doc.get("mycontent"));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if(null != searcher){
try {
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}

下面是JUnit4.x编写的测试

package com.jadyer.test;

import java.io.File;

import org.junit.Before;
import org.junit.Test; import com.jadyer.lucene.HelloSearch; public class HelloSearchTest {
private HelloSearch hello; @Before
public void init(){
hello = new HelloSearch();
} @Test
public void searchByTerm(){
hello.searchByTerm("content", "my");
} @Test
public void searchByTermRange(){
hello.searchByTermRange("name", "M", "o");
} @Test
public void searchByNumericRange(){
hello.searchByNumericRange("attach", 2, 5);
} @Test
public void searchByPrefix(){
hello.searchByPrefix("content", "b");
} @Test
public void searchByWildcard(){
hello.searchByWildcard("name", "Ja??er");
} @Test
public void searchByFuzzy(){
hello.searchByFuzzy("name", "Jadk");
} @Test
public void searchByBoolean(){
hello.searchByBoolean();
} @Test
public void searchByPhrase(){
hello.searchByPhrase();
} @Test
public void searchByQueryParse(){
hello.searchByQueryParse();
} @Test
public void searchPage(){
for(File file : new File("myExample/03_index/").listFiles()){
file.delete();
}
hello = new HelloSearch(true);
hello.searchPage("mycontent:javase", 2, 10);
} @Test
public void searchPageByAfter(){
for(File file : new File("myExample/03_index/").listFiles()){
file.delete();
}
hello = new HelloSearch(true);
hello.searchPageByAfter("mycontent:javase", 3, 10);
}
}

最新文章

  1. hibernate缓存机制(转)
  2. JQuery中的html(),text(),val()区别
  3. ACM题目————网格动物
  4. 《锋利的jQuery》心得笔记--Three Sections
  5. 在Xcode6中找回失去的模板
  6. 盘点六大在中国复制失败的O2O案例
  7. C#实现数据结构——线性表(下)
  8. Android判读是否安装了某一款APP
  9. hdfs-over-ftp安装与配置
  10. poj 3111 K Best (二分搜索之最大化平均值之01分数规划)
  11. 【转及总结】Bootstrap 框架 栅格布局系统底层设计原理
  12. jq-css、class、属性操作
  13. ZigBee 学习资源
  14. 撩课-Web大前端每天5道面试题-Day22
  15. SICP读书笔记 2.1
  16. jQuery练手:仿新浪微博图片文字列表淡进淡出上下滚动效果
  17. pause和resume
  18. 史上最强网推案例,没有之一【ZW团队实战经典】
  19. 获取 config文件的节点值
  20. spring--注入方式

热门文章

  1. JS操作Json
  2. cocos2dx-2.2.1 免 Cygwin 环境搭建(Win8+VS2013+ADT Bundle+android-ndk-r9c)
  3. Linux压力测试工具
  4. Python字符界面函数库
  5. tsne降维可视化
  6. WSGI、flup、fastcgi、web.py的关系
  7. GitHub超详细图文攻略 - Git客户端下载安装 GitHub提交修改源码工作流程 Git分支 标签 过滤 Git版本工作流
  8. 卷积神经网络CNN总结
  9. centos下从源码安装openssl
  10. 关于在Windows下Composer下载安装Yii2.0
  11. WebStorm2018破解
  12. vue中动态样式不起作用? scoped了解一下
  13. winfrom程序Datagridview列名问题
  14. 20. Web proxies (网页代理 4个)
  15. 斐波那契数列yield表示
  16. gcc编译器命令使用详解
  17. 【2017-04-25】winform公共控件、菜单和工具栏
  18. vscode 使用sublime风格代码
  19. RDD转换成DataFrames
  20. python对列表中的字典进行排序