solr5.5.2 增量索引配置方法

  • 经过各种测试,解决N多BUG终于把增量索引配置成功了。
  • 一定要查看solr_home/server/logs/solr.log这个日志文件,这里记录了整个输出信息。以前用的4.7版本日志都输出在cmd窗口的,到了5.5cmd窗口就只有启动时输出的少量信息。如果发现哪里功能不正常了就在这个log文件里查看是否有异常信息输出。
  • 配置定时任务时用到的solr-dataimportscheduler-1.0.jar和其他人修改过的solr-dataimportscheduler-1.1.jar在5.5都会报空指针异常。
  • 可以用的jar包为http://download.csdn.net/detail/ljsososo/9486023#comment 修改过的mydataimportscheduler.jar。
  • 两个主要的配置文件代码如下:
  • managed-schema文件(或者schema.xml)


<schema name="example" version="1.6">

<fields>
<field name="text" type="text_mmseg4j_complex" indexed="true" stored="false" multiValued="true"/>
   <field name="_version_" type="string" indexed="true" stored="true"/>
   <field name="id" type="string" required="true"/>    
   <field name="doc_type" type="string" />
   <field name="name" type="text_mmseg4j_maxword" />    
   <field name="address" type="text_mmseg4j_complex" />
   <field name="abstract" type="text_mmseg4j_complex" />
   
   <field name="lat" type="tdouble" indexed="true" stored="true"/>
   
   <field name="lon" type="tdouble" indexed="true" stored="true"/>
   <field name="actiontime" type="tdate" />
   <field name="type" type="text_mmseg4j_maxword" />    
   <field name="city" type="text_mmseg4j_complex" />
   <field name="dam_id" type="int"/>   
fields>
<uniqueKey>iduniqueKey>
 <types>
  <fieldType name="string" class="solr.StrField" sortMissingLast="true" />  
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
    
     <field name="mmseg4j_complex_name" type="text_mmseg4j_complex" indexed="true" stored="true"/>
     <field name="mmseg4j_maxword_name" type="text_mmseg4j_maxword" indexed="true" stored="true"/>
     <field name="mmseg4j_simple_name" type="text_mmseg4j_simple" indexed="true" stored="true"/>

     <fieldType name="text_mmseg4j_complex" class="solr.TextField" positionIncrementGap="100" >
        <analyzer>
           <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="complex" dicPath="dic"/>
           <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
         analyzer>
     fieldType>
     <fieldType name="text_mmseg4j_maxword" class="solr.TextField" positionIncrementGap="100" >
          <analyzer>
            <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="max-word" dicPath="/dic"/>
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
          analyzer>
      fieldType>
      <fieldType name="text_mmseg4j_simple" class="solr.TextField" positionIncrementGap="100" >
          <analyzer>
            <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="simple" dicPath="/dic"/>
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
          analyzer>
      fieldType>
    
 types>
schema>

data-config.xml文件:

<dataConfig>
  <dataSource type="JdbcDataSource" 
              driver="com.mysql.jdbc.Driver"
              url="jdbc:mysql://192.168.1.124/isfs_sims" 
              user="root" 
              password="1234"/>
  <document>
    <entity name="dam" pk="dam_id" transformer="TemplateTransformer" 
     query="select a.intId dam_id,b.varName type,a.varName name,c.varName city ,a.varAddress address,a.fltLongitude lon,a.fltLatitude lat,a.varAbstract abstract,dtLastModified actiontime from tbDam a,tbAffiliate b,tbCity c where a.varType=b.intId and a.intCityId=c.intCityId"
     deltaImportQuery="select a.intId dam_id,b.varName type,a.varName name,c.varName city ,a.varAddress address,a.fltLongitude lon,a.fltLatitude lat,a.varAbstract abstract,dtLastModified actiontime from tbDam a,tbAffiliate b,tbCity c where a.intId='${dataimporter.delta.dam_id}' and a.varType=b.intId and a.intCityId=c.intCityId" 
     deltaQuery="select intId dam_id from tbDam where dtLastModified > '${dataimporter.last_index_time}'">  
            <field column="id"  template="Dam_${dam.dam_id}"/>
            <field column="dam_id" />
            <field column="doc_type"  template="dam"/>
            <field column="name" />
            <field column="type" />
            <field column="city" />
            <field column="lat" />
            <field column="lon" />
            <field column="address" />
            <field column="abstract" />
            <field column="actiontime" />
    entity>

  document>
dataConfig>

这里涉及到多表的联合查询,所以代码较长。但是这里主要关注的应该的dam_id,这个是entity的主键因此需要一个field来存这个值,不然作为唯一值的id不能设置template,进一步导致多个entity时,创建或修改索引就会乱七八糟,因为相同的id会直接覆盖。

别忘了在启动solr的容器中修改web.xml,在servlet标签之前加上:




org.apache.solr.handler.dataimport.scheduler.ApplicationListener


你可能感兴趣的:(solr)