package tao.lucene.test;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class Index {
@BeforeClass
public static void setUpBeforeClass() throws Exception {
}
@After
public void tearDown() throws Exception {
}
@Test
public void addDoc() {
try {
Directory dir = FSDirectory.open(new File("E:\\lucenedata"));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
new IKAnalyzer());
IndexWriter writer = new IndexWriter(dir, config);
Document doc = new Document();
doc.add(new Field("id", "55", Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("content",
"前面总结了很多Lucene上的东西,建立索引、高亮等等这些都是为了查询做准备和服务的,",
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("title", "我的标题列", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new NumericField("time").setIntValue(20121012));
writer.addDocument(doc);
System.out.println(writer.maxDoc());
// writer.commit();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
@Test
public void delDoc() {
try {
Directory dir = FSDirectory.open(new File("E:\\lucenedata"));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
new IKAnalyzer());
IndexWriter writer = new IndexWriter(dir, config);
System.out.println(writer.numDocs() + "===" + writer.maxDoc());
writer.deleteDocuments(new Term("id", "55"));
// writer.deleteDocuments(new Term("content", "我的文章"));
// writer.deleteDocuments(new Term("content", "我修改了"));
writer.commit();
System.out.println(writer.numDocs() + "===" + writer.maxDoc());
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
@Test
public void updateDoc() {
try {
Directory dir = FSDirectory.open(new File("E:\\lucenedata"));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
new IKAnalyzer());
IndexWriter writer = new IndexWriter(dir, config);
System.out.println(writer.numDocs() + "===" + writer.maxDoc());
Document doc = new Document();
doc.add(new Field("id", "55", Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("content",
"前面总结了很多Lucene上的东西,建立索引、高亮等等这些都是为了查询做准备和服务的,",
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("title", "我的标题列", Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new NumericField("time").setIntValue(20121012));
writer.updateDocument(new Term("id", "55"), doc);
writer.commit();
System.out.println(writer.numDocs() + "===" + writer.maxDoc());
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 词条搜索,不分词搜索,对于分词的字段不适用,但不会报错
*/
@Test
public void termForSearchTest() {
try {
Directory dir = FSDirectory.open(new File("E:\\lucenedata"));
IndexReader reader = IndexReader.open(dir);
IndexSearcher search = new IndexSearcher(reader);
Term t = new Term("title", "我的标题列");
Query q = new TermQuery(t);
System.out.println(q.getClass().getCanonicalName());
// q=new TermRangeQuery("content", 0, 100, true, true);
TopDocs tdocs = search.search(q, 10000);
ScoreDoc[] sdocs = tdocs.scoreDocs;
for (ScoreDoc sdoc : sdocs) {
Document doc = search.doc(sdoc.doc);
Explanation ep = search.explain(q, sdoc.doc);
System.out.println(ep);
System.out.println("id==" + doc.get("id"));
System.out.println("content==" + doc.get("content"));
System.out.println("title==" + doc.get("title"));
System.out.println("time==" + doc.get("time"));
}
search.close();
reader.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
/**
* 分词搜索,使用分词字段
*/
@Test
public void queryParserForSearch() {
try {
Directory dir = FSDirectory.open(new File("E:\\lucenedata"));
IndexReader reader = IndexReader.open(dir);
IndexSearcher search = new IndexSearcher(reader);
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new IKAnalyzer());
Query q = parser.parse("lucene建立索引高亮查询");
System.out.println(q.getClass().getCanonicalName());
TopDocs tdocs = search.search(q, 10000);
ScoreDoc[] sdocs = tdocs.scoreDocs;
for (ScoreDoc sdoc : sdocs) {
Document doc = search.doc(sdoc.doc);
Explanation ep = search.explain(q, sdoc.doc);
System.out.println(ep);
System.out.println("id==" + doc.get("id"));
System.out.println("content==" + doc.get("content"));
System.out.println("title==" + doc.get("title"));
System.out.println("time==" + doc.get("time"));
}
search.close();
reader.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
/**
* 前缀搜索,使用不分词搜索
*/
@Test
public void prefixQuery() {
try {
Directory dir = FSDirectory.open(new File("E:\\lucenedata"));
IndexReader reader = IndexReader.open(dir);
IndexSearcher search = new IndexSearcher(reader);
Term term = new Term("title", "我的");
PrefixQuery q = new PrefixQuery(term);
TopDocs tdocs = search.search(q, 10000);
ScoreDoc[] sdocs = tdocs.scoreDocs;
for (ScoreDoc sdoc : sdocs) {
Document doc = search.doc(sdoc.doc);
Explanation ep = search.explain(q, sdoc.doc);
System.out.println(ep);
System.out.println("id==" + doc.get("id"));
System.out.println("content==" + doc.get("content"));
System.out.println("title==" + doc.get("title"));
System.out.println("time==" + doc.get("time"));
}
search.close();
reader.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
/**
* 数字搜索,使用不分词字段搜索
*/
@Test
public void numerQuery() {
try {
Directory dir = FSDirectory.open(new File("E:\\lucenedata"));
IndexReader reader = IndexReader.open(dir);
IndexSearcher search = new IndexSearcher(reader);
NumericRangeQuery<Integer> q=NumericRangeQuery.newIntRange("time", 20101010, 20121212, true, true);
TopDocs tdocs = search.search(q, 10000);
ScoreDoc[] sdocs = tdocs.scoreDocs;
for (ScoreDoc sdoc : sdocs) {
Document doc = search.doc(sdoc.doc);
Explanation ep = search.explain(q, sdoc.doc);
System.out.println(ep);
System.out.println("id==" + doc.get("id"));
System.out.println("content==" + doc.get("content"));
System.out.println("title==" + doc.get("title"));
System.out.println("time==" + doc.get("time"));
}
search.close();
reader.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
/**
* 组合搜索
*/
@Test
public void booleanQuery() {
try {
Directory dir = FSDirectory.open(new File("E:\\lucenedata"));
IndexReader reader = IndexReader.open(dir);
IndexSearcher search = new IndexSearcher(reader);
Term term = new Term("title", "我的标题列");
PrefixQuery pq = new PrefixQuery(term);
NumericRangeQuery<Integer> nq=NumericRangeQuery.newIntRange("time", 20101010, 20121212, true, true);
BooleanQuery bq=new BooleanQuery();
//must必须包含,should不是必须包含,相当于or,must_not一定不包含
bq.add(pq, BooleanClause.Occur.MUST);
bq.add(nq, BooleanClause.Occur.SHOULD);
TopDocs tdocs = search.search(bq, 10000);
ScoreDoc[] sdocs = tdocs.scoreDocs;
for (ScoreDoc sdoc : sdocs) {
Document doc = search.doc(sdoc.doc);
Explanation ep = search.explain(bq, sdoc.doc);
System.out.println(ep);
System.out.println(ep);
System.out.println("id==" + doc.get("id"));
System.out.println("content==" + doc.get("content"));
System.out.println("title==" + doc.get("title"));
System.out.println("time==" + doc.get("time"));
}
search.close();
reader.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
/**
* 多关键字搜索,可以限制关键字间的距离
*/
@Test
public void phraseQuery() {
try {
Directory dir = FSDirectory.open(new File("E:\\lucenedata"));
IndexReader reader = IndexReader.open(dir);
IndexSearcher search = new IndexSearcher(reader);
PhraseQuery q=new PhraseQuery();
q.setSlop(5);
Term term = new Term("content", "前面");
q.add(term);
Term term1 = new Term("content", "索引");
q.add(term1);
TopDocs tdocs = search.search(q, 10000);
ScoreDoc[] sdocs = tdocs.scoreDocs;
for (ScoreDoc sdoc : sdocs) {
Document doc = search.doc(sdoc.doc);
Explanation ep = search.explain(q, sdoc.doc);
System.out.println(ep);
System.out.println("id==" + doc.get("id"));
System.out.println("content==" + doc.get("content"));
System.out.println("title==" + doc.get("title"));
System.out.println("time==" + doc.get("time"));
}
search.close();
reader.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
}