目前缺了数值和日期的模糊查询与精确查询,等学会了以后再更新
package com.test;import java.util.Date;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.IntField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.xml.builders.NumericRangeFilterBuilder;import org.apache.lucene.queryparser.xml.builders.NumericRangeQueryBuilder;import org.apache.lucene.search.BooleanQuery;import org.apache.lucene.search.FuzzyQuery;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.PhraseQuery;import org.apache.lucene.search.PrefixQuery;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TermRangeQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.WildcardQuery;import org.apache.lucene.search.BooleanClause.Occur;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.BytesRef;import org.apache.lucene.util.Version;public class SearcherUtil { private Directory directory; private IndexReader reader; private Date[] dates = null; private String[] ids = {"1","2","3","4","5","6"}; private String[] emails = {"aa@tom.com","bb@edu.com","cc@sina.com","dd@yaho.com","ee@qq.com","ff@163.com"}; private String[] content = {"welcome to 1 room,I like tom","welcome to 2 room","welcome to 3 room","welcome to 4 room,i like qq","welcome to 5 room","welcome to 6 room",}; private int[] attachs = {1,4,3,2,5,1}; //模拟附件数量 ,专门用来演示为数字加索引 private String[] names = {"张三","李四","王五","马六","赵七","刘八"}; private MapscorsMap = new HashMap (); public SearcherUtil() throws Exception { directory = new RAMDirectory(); index(); } //或者查询器 public IndexSearcher getSearcher() throws Exception { if (reader == null) { reader = DirectoryReader.open(directory); } else { IndexReader tr = DirectoryReader .openIfChanged((DirectoryReader) reader); if (tr != null) {// 如果已经做了改变,并且返回了全新的reader reader.close(); reader = tr; } } return new IndexSearcher(reader); } //字符精确查询 public void searchByTerm(String field,String name,int num) throws Exception { //在这里的term为精确查找,稍微有一点不同都无法匹配,这里的匹配是以字段为单位的,比如tom一个单词可以匹配,但是t一个字母无法匹配 IndexSearcher searcher = getSearcher(); Query query = new TermQuery(new Term(field,name)); TopDocs tds = searcher.search(query,num); System.out.println("一共查询了:"+tds.totalHits); for ( ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"id<------->"+doc.get("name")+"<------name"+doc.get("email")+"email<-----"); } } //字符范围查询 public void searchByTermRange(String field,String start,String end,int num )throws Exception{ IndexSearcher searcher =getSearcher(); Query query = new TermRangeQuery(field, new BytesRef(start), new BytesRef(end), true, true); TopDocs tds = searcher.search(query,num); System.out.println("一共查询了:"+tds.totalHits); for ( ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"id<------->"+doc.get("name")+"<------name"+doc.get("email")+"email<-----"); } } //按前缀查询 public void searchByPrefix(String field,String value,int num)throws Exception{ IndexSearcher searcher =getSearcher(); Query query = new PrefixQuery(new Term(field,value)); TopDocs tds = searcher.search(query,num); System.out.println("一共查询了:"+tds.totalHits); for ( ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"id<------->"+doc.get("name")+"<------name"+doc.get("email")+"email<-----"); } } //通配符查询 public void searchByWildcard(String field,String value,int num)throws Exception{ IndexSearcher searcher =getSearcher(); Query query = new WildcardQuery(new Term(field,value)); TopDocs tds = searcher.search(query,num); System.out.println("一共查询了:"+tds.totalHits); for ( ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"id<------->"+doc.get("name")+"<------name"+doc.get("email")+"email<-----"); } } //模糊查询 会有距离限制,一般情况下就算有一些字符不同,也可以查询出来 public void searchByFuzzy(int num)throws Exception{ IndexSearcher searcher =getSearcher(); FuzzyQuery query = new FuzzyQuery(new Term("email","aatom.com")); //在构造函数时可以传入值设定匹配相似度,距离等 System.out.println("query.getPrefixLength()"+query.getPrefixLength()); TopDocs tds = searcher.search(query,num); System.out.println("一共查询了:"+tds.totalHits); for ( ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"id<------->"+doc.get("name")+"<------name"+doc.get("email")+"email<-----"); } } //boolean查询 public void searchByBoolean(int num)throws Exception{ IndexSearcher searcher =getSearcher(); BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term("email","aa@tom.com")),Occur.MUST);//,Occur.MUST的意思是email必须是这个值,必须符合这个条件 Occur.SHOULD类似于可以符合,也可以不符合,Occur.MUST_NOT 不能出现 query.add(new TermQuery(new Term("content","like")),Occur.SHOULD); //可以叠加多条的,后面的条件类似于与或非门.sql的or 和 and TopDocs tds = searcher.search(query,num); System.out.println("一共查询了:"+tds.totalHits); for ( ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"id<------->"+doc.get("name")+"<------name"+doc.get("email")+"email<-----"); } } //Phrase查询 短语查询 public void searchByPhrase (int num)throws Exception{ IndexSearcher searcher =getSearcher(); PhraseQuery query = new PhraseQuery(); query.setSlop(1); //第一个term query.add(new Term("content","i")); //产生距离之后的第二个term 如果两个调换了顺序就不可以查询到了。 整个查询的意思是,查询在i之后有一个like 的短语的记录 query.add(new Term("content","like")); TopDocs tds = searcher.search(query,num); System.out.println("一共查询了:"+tds.totalHits); for ( ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")+"id<------->"+doc.get("name")+"<------name"+doc.get("email")+"email<-----"); } } //建立索引 public void index() throws Exception { IndexWriter writer = null; writer = new IndexWriter(directory, new IndexWriterConfig(Version.LATEST, new StandardAnalyzer(Version.LATEST))); writer.deleteAll(); Document doc = null; for (int i = 0; i < ids.length; i++) { doc = new Document(); doc.add(new Field("id", ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED)); //field.Store.YES的意思是,是不是把该域中的内容完全存储到索引文件当中,方便进行文本的还原。 //field.Store.NO的意思是,把这个域的内容不存储到索引文件中,但是呢,可以被索引。此时内容无法完全还原。就是无法使用doc.get来还原。一般的搜索引擎也是只显示简介,不会显示所有内容 //--------------------------------- //Field.Index叫做索引选项。 //Index.ANALYZED 进行分词和索引,适用于标题和内容 //Index.NOT_ANALYZED 进行索引,但是不尽兴分词,如身份证号,姓名,ID等,适用于精确搜索 //Index.ANALYZED_NOT_NORMS 进行分词,但是不存储norms信息,这个norms中包括了创建索引的时间和权值等信息。 //Index.NOT_ANALYZED_NOT_NORMS 既不进行分词也不存储norms。 //Index.NO 不进行索引 Field contentField = new Field("content", content[i],Field.Store.NO,Field.Index.ANALYZED); Field emailField = new Field("email", emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED); doc.add(emailField); doc.add(contentField); doc.add(new Field("name", names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new IntField("attachs", attachs[i], Field.Store.YES)); String et = emails[i].substring(emails[i].lastIndexOf("@")+1); if (scorsMap.containsKey(et)) { //为索引域添加权,例如,这里的例子是,如果这个email域有包含@tom.com的就设置content域的权值为2.0,如果为@yaho.com的就设置content域的权值为2.5,其他的设置为0.5,默认值是1.0 //3.5版本有个为document文档添加权的,但是现在4.0以上版本已经没有了,目前所知是可以为field添加权值,请注意每个域的区别 contentField.setBoost(scorsMap.get(et)); } else { contentField.setBoost(0.5f); } writer.addDocument(doc); } if (writer!=null) { //如果不尽兴close操作,或者commit操作,那么索引不起效,会报出索引找不到的异常 writer.close(); } }}
package com.junittest;import org.junit.Before;import org.junit.Test;import com.test.IndexUtil;import com.test.SearcherUtil;public class testSearch { private SearcherUtil su ; @Before //索引方法 public void init() throws Exception { su = new SearcherUtil(); } @Test //精确查询 public void searchByTerm() throws Exception { su.searchByTerm("content", "i", 3); //中文都无法查询出来,估计是使用的分词器的原因,得用中文的分词器 } @Test //范围查询 public void searchByTermRange() throws Exception { su.searchByTermRange("email", "a", "b",10);//这句的意思是:从email这个field中,查询,从以a开头的值开始,一直查到包含b的那条数据结尾,一共查10条 System.out.println("------------------"); su.searchByTermRange("name", "a", "c",10);//数字类型无法使用该查询查询出来, System.out.println("------------------"); su.searchByTermRange("attachs", "1", "3",10); } @Test //前缀查询 public void searchByPrefix() throws Exception { su.searchByPrefix("email", "a",10);//这句的意思是:从email这个field中,查询,从以a开头的记录,一共查10条 } @Test //通配符查询 public void searchByWildcard() throws Exception { //*表示多个字符 ?表示一个字符 su.searchByWildcard("email", "a*",10);//这句的意思是:从email这个field中,查询包含有匹配a+通配符的 记录,一共查10条 } @Test //boolean查询 public void searchByBoolean() throws Exception { //*表示多个字符 ?表示一个字符 su.searchByBoolean(10);//这句的意思是:从email这个field中,查询包含有匹配a+通配符的 记录,一共查10条 } @Test //phrase查询 短语查询 public void searchByPhrase() throws Exception { //*表示多个字符 ?表示一个字符 su.searchByPhrase(10);//这句的意思是:从email这个field中,查询包含有匹配a+通配符的 记录,一共查10条 } @Test //模糊查询 短语查询 public void searchByFuzzy() throws Exception { //*表示多个字符 ?表示一个字符 su.searchByFuzzy(10);//这句的意思是:从email这个field中,查询包含有匹配a+通配符的 记录,一共查10条 }}