配置参考solrConfig.xml
<!-- 7.5中solrConfig.xml中已做详细解释,或者参考5.3 --> <?xml version="1.0" encoding="UTF-8" ?> <config> <luceneMatchVersion>4.7</luceneMatchVersion> <schemaFactory class="ClassicIndexSchemaFactory"/> <updateHandler class="solr.DirectUpdateHandler2"> <maxPendingDeletes>100000</maxPendingDeletes> </updateHandler> <query> <maxBooleanClauses>1024</maxBooleanClauses> <filterCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="256"/> <queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="256"/> <documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/>
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<queryResultWindowSize>50</queryResultWindowSize>
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<HashDocSet maxSize="3000" loadFactor="0.75"/>
<listener event="newSearcher" class="solr.QuerySenderListener"> <arr name="queries"> <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst> <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst> <lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst> </arr> </listener>
<listener event="firstSearcher" class="solr.QuerySenderListener"> <arr name="queries"> </arr> </listener>
<useColdSearcher>false</useColdSearcher>
<maxWarmingSearchers>4</maxWarmingSearchers>
</query>
<requestDispatcher handleSelect="true" > <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" formdataUploadLimitInKB="2048" /> <httpCaching never304="true"> </httpCaching> </requestDispatcher> <requestHandler name="standard" class="solr.StandardRequestHandler" default="true"> <lst name="defaults"> <str name="echoParams">explicit</str> </lst> </requestHandler>
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> <lst name="defaults"> <str name="config">tika-data-config.xml</str> </lst> </requestHandler>
<requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler"> <lst name="defaults"> <str name="echoParams">explicit</str> </lst> </requestHandler>
<requestHandler name="/update" class="solr.UpdateRequestHandler" > </requestHandler>
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
<requestHandler name="/admin/ping" class="solr.PingRequestHandler"> <lst name="invariants"> <str name="q">solrpingquery</str> </lst> <lst name="defaults"> <str name="echoParams">all</str> </lst> </requestHandler>
<admin> <defaultQuery>*:*</defaultQuery> </admin>
</config> |
schema.xml
<!-- 7.5中schema.xml中已做详细解释,或者参考5.2 --> <?xml version="1.0" encoding="UTF-8" ?> <schema name="test" version="1.2"> <types> <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/> <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/> <fieldType name="integer" class="solr.IntField" omitNorms="true"/> <fieldType name="long" class="solr.LongField" omitNorms="true"/> <fieldType name="float" class="solr.FloatField" omitNorms="true"/> <fieldType name="double" class="solr.DoubleField" omitNorms="true"/> <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/> <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/> <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/> <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/> <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/> <fieldType name="random" class="solr.RandomSortField" indexed="true" /> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory"/> </analyzer> </fieldType> <fieldType name="text" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer> <analyzer type="query"> <tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer> </fieldType> <fieldType name="text_ik" class="solr.TextField"> <analyzer type="index" class="org.wltea.analyzer.lucene.IKAnalyzer" useSmart="true"/> <analyzer type="query" class="org.wltea.analyzer.lucene.IKAnalyzer" useSmart="false"/> </fieldType> </types>
<fields> <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> <field name="fileName" type="string" indexed="true" stored="true" /> <field name="author" type="string" indexed="true" stored="true" /> <field name="title" type="string" indexed="true" stored="true" />
<field name="size" type="long" indexed="true" stored="true" /> <field name="lastModified" type="date" indexed="true" stored="true" />
<field name="text" type="text_ik" indexed="true" stored="false" multiValued="true"/>
</fields> <defaultSearchField>text</defaultSearchField> <solrQueryParser defaultOperator="OR"/>
</schema> |
tika-data-config.xml
<!—具体解释参照8.2 --> <dataConfig> <dataSource type="BinFileDataSource" /> <document> <entity name="files" dataSource="binary" rootEntity="false" processor="FileListEntityProcessor" baseDir="D:/temp" fileName=".*.(doc)|(pdf)|(xls)|(ppt)|(docx)"
recursive="true"> <field column="fileAbsolutePath" name="id" /> <field column="fileSize" name="size" /> <field column="fileLastModified" name="lastModified" />
<entity name="documentImport" processor="TikaEntityProcessor" url="${files.fileAbsolutePath}" format="text"> <field column="file" name="fileName"/> <field column="Author" name="author" meta="true"/> <field column="title" name="title" meta="true"/> <field column="text" name="text"/>
</entity> </entity> </document> </dataConfig> |
更多精彩内容请关注:http://bbs.superwu.cn