solr4.0.0学习(二) 数据库导入clob与blob为索引

导入clob很简单。但是blob好像没有提供方法,所以改了一下源码,重新编译替换class文件,竟然成功了。

先把配置文件贴上

SCHEMA.XML

 

<?xml version="1.0" ?>

<schema name="test" version="1.1">

  <types>

   <fieldtype name="string"  class="solr.StrField" sortMissingLast="true" omitNorms="true"/>



   <fieldType name="standard" class="solr.TextField" positionIncrementGap="100">

	  <analyzer type="index">

		<tokenizer class="solr.StandardTokenizerFactory"/>

		<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />

		<filter class="solr.LowerCaseFilterFactory"/>

	  </analyzer>

	  <analyzer type="query">

		<tokenizer class="solr.StandardTokenizerFactory"/>

		<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />

		<filter class="solr.LowerCaseFilterFactory"/>

	  </analyzer>

	</fieldType>



	<fieldType name="ik" class="solr.TextField">   

       <analyzer class="org.wltea.analyzer.lucene.IKAnalyzer"/>   

	</fieldType>



  </types>



 <fields>   

  <field name="blogId"      type="string"   indexed="false"  stored="true"  multiValued="false"/>

  <field name="blogTitle"   type="ik"   indexed="true"  stored="true"  multiValued="false" /> 

  <field name="blogAuthorName"   type="ik"   indexed="true"  stored="true"  multiValued="false" /> 

  <field name="blogContent"   type="ik"   indexed="true"  stored="true"  multiValued="false" /> 

  <field name="TITLE"   type="ik"   indexed="true"  stored="true"  /> 

  <field name="TEXT"   type="ik"   indexed="true"  stored="true"  /> 

 </fields>

 <defaultSearchField>blogTitle</defaultSearchField>

 <solrQueryParser defaultOperator="OR"/>



</schema>




这里的field只用到了blogContent一个。

 

SOLRCONFIG.XML

 

<?xml version="1.0" encoding="UTF-8" ?>

<config>

  <luceneMatchVersion>LUCENE_34</luceneMatchVersion>

  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>

  <updateHandler class="solr.DirectUpdateHandler2" />



  <requestDispatcher handleSelect="true" >

    <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />

  </requestDispatcher>

  

  <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />

  <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />

  <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />

      

 <!-- the dataimport requestHandler --> 

       <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> 

               <lst name="defaults"> 

              <str name="config">db-data-config.xml</str> 

             </lst> 

       </requestHandler> 



  <admin>

    <defaultQuery>solr</defaultQuery>

  </admin>

  <unlockOnStartup>true</unlockOnStartup>

	<lockType>simple</lockType>

  <requestHandler name="/analysis/field" 

                  startup="lazy"

                  class="solr.FieldAnalysisRequestHandler" />



</config>




db-data-config.xml

 

 

<dataConfig> 

<dataSource name="f1" type="FieldStreamDataSource"/>

 <dataSource driver="oracle.jdbc.driver.OracleDriver"  

 url="jdbc:oracle:thin:@127.0.0.1:1521:orcl" user="HT" password="HT"/> 

 <document> 

		<entity name="blog" query="SELECT BLOG_CONTENT from  TB_ENT_BLOG" transformer="ClobTransformer"> 

				 <field column="BLOG_CONTENT" name="blogContent" clob="true"/> 

		</entity>

 </document> 

</dataConfig> 


然后修改了ClobTransformer.java。使其同时支持BLOG格式。

 


package org.apache.solr.handler.dataimport;



import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.io.Reader;

import java.sql.Blob;

import java.sql.Clob;

import java.sql.SQLException;

import java.util.ArrayList;

import java.util.List;

import java.util.Map;



public class ClobTransformer extends Transformer

{

  public static final String CLOB = "clob";



  public Object transformRow(Map<String, Object> aRow, Context context)

  {

    for (Map map : context.getAllEntityFields()) {

      if ("true".equals(map.get("clob"))) {

        String column = (String)map.get("column");

        String srcCol = (String)map.get("sourceColName");

        if (srcCol == null)

          srcCol = column;

        Object o = aRow.get(srcCol);

        if ((o instanceof List)) {

          List inputs = (List)o;

          List results = new ArrayList();

          for (Object input : inputs) {

            if ((input instanceof Clob)) {

              Clob clob = (Clob)input;

              results.add(readFromClob(clob));

            }else if(input instanceof Blob){

            	Blob blob = (Blob)input;

            	results.add(readFromBlob(blob));

            }

          }

          aRow.put(column, results);

        }

        else if ((o instanceof Clob)) {

          Clob clob = (Clob)o;

          aRow.put(column, readFromClob(clob));

        }else if(o instanceof Blob){

        	Blob blob = (Blob)o;

        	aRow.put(column, readFromBlob(blob));

        }

      }

    }

    return aRow;

  }



  private String readFromBlob(Blob blob) {

	  try{

		  	InputStream is = blob.getBinaryStream();

			BufferedReader br = new BufferedReader(new InputStreamReader(is));

			String str = "";

			String res = "";

			while((str=br.readLine())!=null){

				res += str;

			}

			return res;

	  }catch (Exception e) {

		  e.printStackTrace();

		  return "";

	}

}



private String readFromClob(Clob clob) {

    Reader reader = null;

	try {

		reader = clob.getCharacterStream();

	} catch (SQLException e1) {

		e1.printStackTrace();

	}

    StringBuilder sb = new StringBuilder();

    char[] buf = new char[1024];

    try

    {

      int len;

      while ((len = reader.read(buf)) != -1)

        sb.append(buf, 0, len);

    }

    catch (IOException e) {

      DataImportHandlerException.wrapAndThrow(500, e);

    }

    return sb.toString();

  }

}

 

这里加了一个readFromBlob方法,加了两个else if。异常的处理很粗糙。

这样替换class文件,导入索引就正常了。在query ":" 页面的response会出现所有blob内容。

如果response没有blob字段或者显示为对象地址,都是错了。



你可能感兴趣的:(Solr)