Zoie is a real-time search and indexing system built on Apache Lucene.
Donated by LinkedIn.com on July 19, 2008, and has been deployed in a real-time large-scale consumer website: LinkedIn.com handling millions of searches as well as millions of updates daily.
Zoie can be configured via Spring:
<!-- An instance of a DataProvider:
FileDataProvider recurses through a given directory and provides the DataConsumer
indexing requests built from the gathered files.
In the example,
this
provider needs to be started manually, and it is done via jmx.
-->
<bean id=
"dataprovider"
class
=
"proj.zoie.impl.indexing.FileDataProvider"
>
<constructor-arg value=
"file:${source.directory}"
/>
<property name=
"dataConsumer"
ref=
"indexingSystem"
/>
</bean>
<!--
an instance of an IndexableInterpreter:
FileIndexableInterpreter converts a text file into a lucene document,
for
example
purposes only
-->
<bean id=
"fileInterpreter"
class
=
"proj.zoie.impl.indexing.FileIndexableInterpreter"
/>
<!-- A decorator
for
an IndexReader instance:
The
default
decorator is just a pass through, the input IndexReader is returned.
-->
<bean id=
"idxDecorator"
class
=
"proj.zoie.impl.indexing.DefaultIndexReaderDecorator"
/>
<!-- A zoie system declaration, passed as a DataConsumer to the DataProvider declared above -->
<bean id=
"indexingSystem"
class
=
"proj.zoie.impl.indexing.ZoieSystem"
init-method=
"start"
destroy-method=
"shutdown"
>
<!-- disk index directory-->
<constructor-arg index=
"0"
value=
"file:${index.directory}"
/>
<!-- sets the interpreter -->
<constructor-arg index=
"1"
ref=
"fileInterpreter"
/>
<!-- sets the decorator -->
<constructor-arg index=
"2"
>
<ref bean=
"idxDecorator"
/>
</constructor-arg>
<!-- set the Analyzer,
if
null
is passed, Lucene's StandardAnalyzer is used -->
<constructor-arg index=
"3"
>
<
null
/>
</constructor-arg>
<!-- sets the Similarity,
if
null
is passed, Lucene's DefaultSimilarity is used -->
<constructor-arg index=
"4"
>
<
null
/>
</constructor-arg>
<!-- the following parameters indicate how often to triggered batched indexing,
whichever the first of the following two event happens will triggered indexing
-->
<!-- Batch size: how many items to put on the queue before indexing is triggered -->
<constructor-arg index=
"5"
value=
"1000"
/>
<!-- Batch delay, how
long
to wait before indxing is triggered -->
<constructor-arg index=
"6"
value=
"300000"
/>
<!-- flag turning on/off real time indexing -->
<constructor-arg index=
"7"
value=
"true"
/>
</bean>
<!-- a search service -->
<bean id=
"mySearchService"
class
=
"com.mycompany.search.SearchService"
>
<!-- IndexReader factory that produces index readers to build Searchers from -->
<constructor-arg ref=
"indexingSystem"
/>
</bean>
|
This example shows how to set up basic indexing and search
thread 1: (indexing thread)
long
batchVersion =
0
;
while
(
true
){
Data[] data = buildDataEvents(...);
// build a batch of data object to index
// construct a collection of indexing events
ArrayList<DataEvent> eventList =
new
ArrayList<DataEvent>(data.length);
for
(Data datum : data){
eventList.add(
new
DataEvent<Data>(batchVersion,datum));
}
// do indexing
indexingSystem.consume(events);
// increment my version
batchVersion++;
}
|
thread 2: (search thread)
// get the IndexReaders
List<ZoieIndexReader<MyDoNothingFilterIndexReader>> readerList = indexingSystem.getIndexReaders();
// MyDoNothingFilterIndexReader instances can be obtained by calling
// ZoieIndexReader.getDecoratedReaders()
List<MyDoNothingFilterIndexReader> decoratedReaders = ZoieIndexReader.extractDecoratedReaders(readerList);
SubReaderAccessor<MyDoNothingFilterIndexReader> subReaderAccessor = ZoieIndexReader.getSubReaderAccessor(decoratedReaders);
// combine the readers
MultiReader reader =
new
MultiReader(readerList.toArray(
new
IndexReader[readerList.size()]),
false
);
// do search
IndexSearcher searcher =
new
IndexSearcher(reader);
Query q = buildQuery(
"myquery"
,indexingSystem.getAnalyzer());
TopDocs docs = searcher.search(q,
10
);
ScoreDoc[] scoreDocs = docs.scoreDocs;
// convert to UID for each doc
for
(ScoreDoc scoreDoc : scoreDocs){
int
docid = scoreDoc.doc;
SubReaderInfo<MyDoNothingFilterIndexReader> readerInfo = subReaderAccessor.getSubReaderInfo(docid);
long
uid = (
long
)((ZoieIndexReader<MyDoNothingFilterIndexReader>)readerInfo.subreader.getInnerReader()).getUID(readerInfo.subdocid);
}
// return readers
indexingSystem.returnIndexReaders(readerList);
|
阅读全文……