Solr4从XML导入数据

白编辑了....cnblogs怎么也没给我保存个 草稿....化繁为简,你照着我的做就ok了

lib什么的记得,去E:\solrbase\distE:\solrbase\lib\ext下面找  然后拷贝到你的tomcat的lib下

总共编辑的3个文件,这三个都在你Solr示例的\solr\collection1\conf下..

solrconfig.xml

schema.xml

xml-data-config.xml

下面贴文件内容了

schema.xml,定义你导入的业务数据的定义,类似数据库的表

<?xml version="1.0" encoding="UTF-8" ?>

<schema name="example" version="1.5">



    <fields>



        <field name="id" type="string" indexed="true" stored="true"

            required="true" multiValued="false" />

        <field name="title" type="text_general" indexed="true" stored="true" />



        <field name="image" type="string" indexed="false" stored="true" />

        <field name="value" type="double" indexed="false" stored="true" />



        <field name="price" type="double" indexed="true" stored="true" />

        <field name="rebate" type="double" indexed="true" stored="true" />



        <field name="bought" type="long" indexed="true" stored="true" />



        <field name="city" type="string" indexed="true" stored="true" />

        <field name="sort" type="string" indexed="true" stored="true" />

        

        <field name="loc" type="string" indexed="true" stored="true" />



        <field name="startTime" type="date" indexed="true" stored="true" />

        <field name="endTime" type="date" indexed="true" stored="true" />



        <!-- catchall field, containing all other searchable text fields (implemented 

            via copyField further on in this schema -->

        <field name="text" type="text_general" indexed="true" stored="false"

            multiValued="true" />



        <field name="_version_" type="long" indexed="true" stored="true" />





    </fields>



    <uniqueKey>id</uniqueKey>



    <copyField source="title" dest="text" />



    <types>



        <fieldType name="string" class="solr.StrField"

            sortMissingLast="true" />



        <!-- boolean type: "true" or "false" -->

        <fieldType name="boolean" class="solr.BoolField"

            sortMissingLast="true" />



        <fieldType name="int" class="solr.TrieIntField"

            precisionStep="0" positionIncrementGap="0" />

        <fieldType name="float" class="solr.TrieFloatField"

            precisionStep="0" positionIncrementGap="0" />

        <fieldType name="long" class="solr.TrieLongField"

            precisionStep="0" positionIncrementGap="0" />

        <fieldType name="double" class="solr.TrieDoubleField"

            precisionStep="0" positionIncrementGap="0" />



        <fieldType name="tint" class="solr.TrieIntField"

            precisionStep="8" positionIncrementGap="0" />

        <fieldType name="tfloat" class="solr.TrieFloatField"

            precisionStep="8" positionIncrementGap="0" />

        <fieldType name="tlong" class="solr.TrieLongField"

            precisionStep="8" positionIncrementGap="0" />

        <fieldType name="tdouble" class="solr.TrieDoubleField"

            precisionStep="8" positionIncrementGap="0" />



        <fieldType name="date" class="solr.TrieDateField"

            precisionStep="0" positionIncrementGap="0" />

        <fieldType name="tdate" class="solr.TrieDateField"

            precisionStep="6" positionIncrementGap="0" />



        <fieldtype name="binary" class="solr.BinaryField" />

        <fieldType name="pint" class="solr.IntField" />

        <fieldType name="plong" class="solr.LongField" />

        <fieldType name="pfloat" class="solr.FloatField" />

        <fieldType name="pdouble" class="solr.DoubleField" />

        <fieldType name="pdate" class="solr.DateField"

            sortMissingLast="true" />

        <fieldType name="random" class="solr.RandomSortField"

            indexed="true" />





        <fieldType name="text_ws" class="solr.TextField"

            positionIncrementGap="100">

            <analyzer>

                <tokenizer class="solr.WhitespaceTokenizerFactory" />

            </analyzer>

        </fieldType>



        <fieldType name="text_general" class="solr.TextField"

            positionIncrementGap="100">

            <analyzer type="index">

                <tokenizer class="solr.StandardTokenizerFactory" />

                <filter class="solr.LowerCaseFilterFactory" />

            </analyzer>

            <analyzer type="query">

                <tokenizer class="solr.StandardTokenizerFactory" />

                <filter class="solr.LowerCaseFilterFactory" />

            </analyzer>

        </fieldType>





        <!-- CJK bigram (see text_ja for a Japanese configuration using morphological 

            analysis) -->

        <fieldType name="text_cjk" class="solr.TextField"

            positionIncrementGap="100">

            <analyzer>

                <tokenizer class="solr.StandardTokenizerFactory" />

                <!-- normalize width before bigram, as e.g. half-width dakuten combine -->

                <filter class="solr.CJKWidthFilterFactory" />

                <!-- for any non-CJK -->

                <filter class="solr.LowerCaseFilterFactory" />

                <filter class="solr.CJKBigramFilterFactory" />

            </analyzer>

        </fieldType>



    </types>

</schema>

solrconfig.xml,添加个 

requestHandler name="/dataimport

    <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">

        <lst name="defaults">

            <str name="config">xml-data-config.xml</str>

        </lst>

    </requestHandler>

xml-data-config.xml,一个建立Solr和xml内容之间的桥梁

<dataConfig>

    <script>

        <![CDATA[

                function ReplaceLocAddId(row)    {

                    var loc_1 = row.get('loc').split('/deal/');

                    var loc_2 = loc_1[1].split('.html');

                    var id = loc_2[0];

                    row.put('id', id);

                    var sdf = new java.text.SimpleDateFormat('yyyy-MM-dd HH:mm:ss');

                    row.put('startTime', com.demo.tuan.DateUtils.format(row.get('startTime')));

                    row.put('endTime', com.demo.tuan.DateUtils.format(row.get('endTime')));

                    row.put('rebate', row.get('rebate').replace('',''));

                    return row;

                }

        ]]>

    </script>

    <dataSource type="FileDataSource" encoding="UTF-8" />

    <document>

        <entity name="collection1" pk="loc"

            url="D:/meituan_hao123.xml"

            processor="XPathEntityProcessor" forEach="/urlset/url"

            transformer="script:ReplaceLocAddId,DateFormatTransformer">



            <field column="loc" xpath="/urlset/url/loc" commonField="true" />



            <field column="city" xpath="/urlset/url/data/display/city"

                commonField="true" />

            <field column="sort" xpath="/urlset/url/data/display/sort"

                commonField="true" />

                

            <field column="title" xpath="/urlset/url/data/display/title"

                commonField="true" />

            <field column="image" xpath="/urlset/url/data/display/image"

                commonField="true" />



            <field column="value" xpath="/urlset/url/data/display/value"

                commonField="true" />

            <field column="price" xpath="/urlset/url/data/display/price"

                commonField="true" />

            <field column="rebate" xpath="/urlset/url/data/display/rebate"

                commonField="true" />

            <field column="bought" xpath="/urlset/url/data/display/bought"

                commonField="true" />



            <field column="startTime" xpath="/urlset/url/data/display/startTime"

                dateTimeFormat="yyyy-MM-dd HH:mm:ss" commonField="true" />

            <field column="endTime" xpath="/urlset/url/data/display/endTime"

                dateTimeFormat="yyyy-MM-dd HH:mm:ss" commonField="true" />



        </entity>

    </document>

</dataConfig>

 

-----OVER------

你可能感兴趣的:(Solr)