Lucene存储对象是以document为存储单元,对象中相关的属性值则存放到Field中;
lucene中所有Field都是IndexableField接口的实现
org.apache.lucene.index.IndexableField
Represents a single field for indexing. IndexWriter consumes Iterable as a document.
IndexableField接口提供了一些方法,主要是对field相关属性的获取,包括
/** 获取field的名称 */
public String name();
/** 获取field的类型fieldType */
public IndexableFieldType fieldType();
/**
*获取当前field的权重(评分值) 只有Field有评分的概念,如果我们想对document进行评分值的设定 必须预先对document中对应的field值进行评分设设定*/ public float boost();
/** 如果此Filed为二进制类型的,返回相应的值*/
public BytesRef binaryValue();
...
/**
* 创建一个用户索引此Field的TokenStream
*/
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) throws IOException;
所有的Field均是org.apache.lucene.document.Field的子类;
项目中我们常用的Field类型主要有IntField, LongField, FloatField, DoubleField, BinaryDocValuesField, NumericDocValuesField, SortedDocValuesField, StringField, TextField, StoredField.
package com.lucene.field;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.junit.Test;
import com.lucene.index.IndexUtil;
import com.lucene.search.SearchUtil;
public class IntFieldTest {
/**
* 保存一个intField
*/
@Test
public void testIndexIntFieldStored() {
Document document = new Document();
document.add(new IntField("intValue", 30, Field.Store.YES));
//要排序必须加同名的field,且类型为NumericDocValuesField
document.add(new NumericDocValuesField("intValue", 30));
Document document1 = new Document();
document1.add(new IntField("intValue", 40, Field.Store.YES));
document1.add(new NumericDocValuesField("intValue", 40));
IndexWriter writer = null;
try {
writer = IndexUtil.getIndexWriter("intFieldPath", false);
writer.addDocument(document);
writer.addDocument(document1);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.commit();
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 测试intField排序
*/
@Test
public void testIntFieldSort(){
try {
IndexSearcher searcher = SearchUtil.getIndexSearcher("intFieldPath", null);
//构建排序字段
SortField[] sortField = new SortField[1];
sortField[0] = new SortField("intValue",SortField.Type.INT,true);
Sort sort = new Sort(sortField);
//查询所有结果
Query query = new MatchAllDocsQuery();
TopFieldDocs docs = searcher.search(query, 2, sort);
ScoreDoc[] scores = docs.scoreDocs;
//遍历结果
for (ScoreDoc scoreDoc : scores) {
System.out.println(searcher.doc(scoreDoc.doc));;
}
//searcher.search(query, results);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
测试排序结果如下
Document>
Document>
如果修改NumericDocValuesField对应的值,结果会随着其值的大小而改变
package com.lucene.field;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.junit.Test;
import com.lucene.index.IndexUtil;
import com.lucene.search.SearchUtil;
public class LongFieldTest {
/**
* 保存一个longField
*/
@Test
public void testIndexLongFieldStored() {
Document document = new Document();
document.add(new LongField("longValue", 50L, Field.Store.YES));
document.add(new NumericDocValuesField("longValue", 50L));
Document document1 = new Document();
document1.add(new LongField("longValue", 80L, Field.Store.YES));
document1.add(new NumericDocValuesField("longValue", 80L));
IndexWriter writer = null;
try {
writer = IndexUtil.getIndexWriter("longFieldPath", false);
writer.addDocument(document);
writer.addDocument(document1);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.commit();
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 测试longField排序
*/
@Test
public void testLongFieldSort(){
try {
IndexSearcher searcher = SearchUtil.getIndexSearcher("longFieldPath", null);
//构建排序字段
SortField[] sortField = new SortField[1];
sortField[0] = new SortField("longValue",SortField.Type.LONG,true);
Sort sort = new Sort(sortField);
//查询所有结果
Query query = new MatchAllDocsQuery();
TopFieldDocs docs = searcher.search(query, 2, sort);
ScoreDoc[] scores = docs.scoreDocs;
//遍历结果
for (ScoreDoc scoreDoc : scores) {
//System.out.println(searcher.doc(scoreDoc.doc));;
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.getField("longValue").numericValue());
}
//searcher.search(query, results);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
运行结果如下:
Document>
Document>
package com.lucene.field;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.junit.Test;
import com.lucene.index.IndexUtil;
import com.lucene.search.SearchUtil;
public class FloatFieldTest {
/**
* 保存一个floatField
*/
@Test
public void testIndexFloatFieldStored() {
Document document = new Document();
document.add(new FloatField("floatValue", 9.1f, Field.Store.YES));
document.add(new FloatDocValuesField("floatValue", 82.0f));
Document document1 = new Document();
document1.add(new FloatField("floatValue", 80.1f, Field.Store.YES));
document1.add(new FloatDocValuesField("floatValue", 80.1f));
IndexWriter writer = null;
try {
writer = IndexUtil.getIndexWriter("floatFieldPath", false);
writer.addDocument(document);
writer.addDocument(document1);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.commit();
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 测试intField排序
*/
@Test
public void testFloatFieldSort(){
try {
IndexSearcher searcher = SearchUtil.getIndexSearcher("floatFieldPath", null);
//构建排序字段
SortField[] sortField = new SortField[1];
sortField[0] = new SortField("floatValue",SortField.Type.FLOAT,true);
Sort sort = new Sort(sortField);
//查询所有结果
Query query = new MatchAllDocsQuery();
TopFieldDocs docs = searcher.search(query, 2, sort);
ScoreDoc[] scores = docs.scoreDocs;
//遍历结果
for (ScoreDoc scoreDoc : scores) {
//System.out.println(searcher.doc(scoreDoc.doc));;
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.getField("floatValue").numericValue());
}
//searcher.search(query, results);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
结果如下:
Document>
Document>
package com.lucene.field;
import java.io.IOException;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.util.BytesRef;
import org.junit.Test;
import com.lucene.index.IndexUtil;
import com.lucene.search.SearchUtil;
public class BinaryDocValuesFieldTest {
/**
* 保存一个BinaryDocValuesField
*/
@Test
public void testIndexLongFieldStored() {
Document document = new Document();
document.add(new BinaryDocValuesField("binaryValue",new BytesRef("1234".getBytes())));
Document document1 = new Document();
document1.add(new BinaryDocValuesField("binaryValue",new BytesRef("2345".getBytes())));
IndexWriter writer = null;
try {
writer = IndexUtil.getIndexWriter("binaryValueFieldPath", false);
writer.addDocument(document);
writer.addDocument(document1);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.commit();
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 测试BinaryDocValuesField排序
*/
@Test
public void testBinaryDocValuesFieldSort(){
try {
IndexSearcher searcher = SearchUtil.getIndexSearcher("binaryValueFieldPath", null);
//构建排序字段
SortField[] sortField = new SortField[1];
sortField[0] = new SortField("binaryValue",SortField.Type.STRING_VAL,true);
Sort sort = new Sort(sortField);
//查询所有结果
Query query = new MatchAllDocsQuery();
TopFieldDocs docs = searcher.search(query, 2, sort);
ScoreDoc[] scores = docs.scoreDocs;
//遍历结果
for (ScoreDoc scoreDoc : scores) {
//System.out.println(searcher.doc(scoreDoc.doc));;
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc);
//System.out.println(doc.getField("binaryValue").numericValue());
}
//searcher.search(query, results);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
运行结果:
Document<>
Document<>
为什么这样呢,这是跟BinaryDocValuesField的特性决定的,只索引不存值!
package com.lucene.field;
import java.io.IOException;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.util.BytesRef;
import org.junit.Test;
import com.lucene.index.IndexUtil;
import com.lucene.search.SearchUtil;
public class StringFieldTest {
/**
* 保存一个StringField
*/
@Test
public void testIndexLongFieldStored() {
Document document = new Document();
document.add(new StringField("stringValue","12445", Field.Store.YES));
document.add(new SortedDocValuesField("stringValue", new BytesRef("12445".getBytes())));
Document document1 = new Document();
document1.add(new StringField("stringValue","23456", Field.Store.YES));
document1.add(new SortedDocValuesField("stringValue", new BytesRef("23456".getBytes())));
IndexWriter writer = null;
try {
writer = IndexUtil.getIndexWriter("stringFieldPath", false);
writer.addDocument(document);
writer.addDocument(document1);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.commit();
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 测试StringField排序
*/
@Test
public void testStringFieldSort(){
try {
IndexSearcher searcher = SearchUtil.getIndexSearcher("stringFieldPath", null);
//构建排序字段
SortField[] sortField = new SortField[1];
sortField[0] = new SortField("stringVal",SortField.Type.STRING,true);
Sort sort = new Sort(sortField);
//查询所有结果
Query query = new MatchAllDocsQuery();
TopFieldDocs docs = searcher.search(query, 2, sort);
ScoreDoc[] scores = docs.scoreDocs;
//遍历结果
for (ScoreDoc scoreDoc : scores) {
//System.out.println(searcher.doc(scoreDoc.doc));;
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc);
//System.out.println(doc.getField("binaryValue").numericValue());
}
//searcher.search(query, results);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
运行结果如下:
Document>
Document>
package com.lucene.field;
import java.io.IOException;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.util.BytesRef;
import org.junit.Test;
import com.lucene.index.IndexUtil;
import com.lucene.search.SearchUtil;
public class TextFieldTest {
/**
* 保存一个StringField
*/
@Test
public void testIndexLongFieldStored() {
Document document = new Document();
document.add(new TextField("textValue","12345", Field.Store.YES));
document.add(new SortedDocValuesField("textValue", new BytesRef("12345".getBytes())));
Document document1 = new Document();
document1.add(new TextField("textValue","23456", Field.Store.YES));
document1.add(new SortedDocValuesField("textValue", new BytesRef("23456".getBytes())));
IndexWriter writer = null;
try {
writer = IndexUtil.getIndexWriter("textFieldPath", false);
writer.addDocument(document);
writer.addDocument(document1);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
writer.commit();
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 测试StringField排序
*/
@Test
public void testStringFieldSort(){
try {
IndexSearcher searcher = SearchUtil.getIndexSearcher("textFieldPath", null);
//构建排序字段
SortField[] sortField = new SortField[1];
sortField[0] = new SortField("textValue",SortField.Type.STRING,true);
Sort sort = new Sort(sortField);
//查询所有结果
Query query = new MatchAllDocsQuery();
TopFieldDocs docs = searcher.search(query, 2, sort);
ScoreDoc[] scores = docs.scoreDocs;
//遍历结果
for (ScoreDoc scoreDoc : scores) {
//System.out.println(searcher.doc(scoreDoc.doc));;
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc);
//System.out.println(doc.getField("binaryValue").numericValue());
}
//searcher.search(query, results);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
运行结果如下:
Document>
Document>
lucene field使用源码
此博文是对近期做lucene索引的总结,今天是第二天了,以后会继续坚持的,大家有问题的话联系本人的Q-Q: 891922381,同时本人新建Q-Q群:106570134(lucene,solr,netty,hadoop),如蒙加入,不胜感激,大家共同探讨,本人争取每日一博,希望大家关注呦