solr3.6实时索引定时器实现

企业要求数据表的数据更新后能够实时的被搜索引擎搜索到,查找solr的DataImport的文档提到了一个定时器实现这种实时要求的解决方案

实现方法:

1 配置监听器

web.xml

  < listener >
         < listener-class >
                org.apache.solr.handler.dataimport.scheduler.ApplicationListener
         </ listener-class >

       </listener> 

2 引入jar文件

注:如果用的是jre6, 官方下载的jar文件要重新编译,貌似是版本不兼容

jar文件包括三个类

(1) 监听器ApplicationListener

 package org.apache.solr.handler.dataimport.scheduler;


import java.util.Calendar;
import java.util.Date;
import java.util.Timer;

import javax.servlet.ServletContext;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public  class ApplicationListener  implements ServletContextListener {

         private  static  final Logger logger = LoggerFactory.getLogger(ApplicationListener. class);

        @Override
         public  void contextDestroyed(ServletContextEvent servletContextEvent) {
                ServletContext servletContext = servletContextEvent.getServletContext();

                 //  get our timer from the context
                Timer timer = (Timer)servletContext.getAttribute("timer");

                 //  cancel all active tasks in the timers queue
                 if (timer !=  null)
                        timer.cancel();

                 //  remove the timer from the context
                servletContext.removeAttribute("timer");

        }

        @Override
         public  void contextInitialized(ServletContextEvent servletContextEvent) {
                ServletContext servletContext = servletContextEvent.getServletContext();
                 try{
                         //  create the timer and timer task objects
                        Timer timer =  new Timer();
                        HTTPPostScheduler task =  new HTTPPostScheduler(servletContext.getServletContextName(), timer);

                         //  get our interval from HTTPPostScheduler
                         int interval = task.getIntervalInt();

                         //  get a calendar to set the start time (first run)
                        Calendar calendar = Calendar.getInstance();

                         //  set the first run to now + interval (to avoid fireing while the app/server is starting)
                        calendar.add(Calendar.MINUTE, interval);
                        Date startTime = calendar.getTime();

                         //  schedule the task
                        timer.scheduleAtFixedRate(task, startTime, 1000 * 60 * interval);

                         //  save the timer in context
                        servletContext.setAttribute("timer", timer);

                }  catch (Exception e) {
                         if(e.getMessage().endsWith("disabled")){
                                logger.info("Schedule disabled");
                        } else{
                                logger.error("Problem initializing the scheduled task: ", e);
                        }
                }
        }

}

(2)定时任务HttpPostScheduler 

package org.apache.solr.handler.dataimport.scheduler;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Timer;
import java.util.TimerTask;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public  class HTTPPostScheduler  extends TimerTask {
         private String syncEnabled;
         private String[] syncCores;
         private String server;
         private String port;
         private String webapp;
         private String params;
         private String interval;
         private String cores;
         private SolrDataImportProperties p;
         private  boolean singleCore;

         private  static  final Logger logger = LoggerFactory.getLogger(HTTPPostScheduler. class);

         public HTTPPostScheduler(String webAppName, Timer t)  throws Exception{
                 // load properties from global dataimport.properties
                p =  new SolrDataImportProperties();
                reloadParams();
                fixParams(webAppName);

                 if(!syncEnabled.equals("1"))  throw  new Exception("Schedule disabled");

                 if(syncCores ==  null || (syncCores.length == 1 && syncCores[0].isEmpty())){
                        singleCore =  true;
                        logger.info("<index update process> Single core identified in dataimport.properties");
                } else{
                        singleCore =  false;
                        logger.info("<index update process> Multiple cores identified in dataimport.properties. Sync active for: " + cores);
                }
        }

         private  void reloadParams(){
                p.loadProperties( true);
                syncEnabled = p.getProperty(SolrDataImportProperties.SYNC_ENABLED);
                cores           = p.getProperty(SolrDataImportProperties.SYNC_CORES);
                server          = p.getProperty(SolrDataImportProperties.SERVER);
                port            = p.getProperty(SolrDataImportProperties.PORT);
                webapp          = p.getProperty(SolrDataImportProperties.WEBAPP);
                params          = p.getProperty(SolrDataImportProperties.PARAMS);
                interval        = p.getProperty(SolrDataImportProperties.INTERVAL);
                syncCores       = cores !=  null ? cores.split(",") :  null;
        }

         private  void fixParams(String webAppName){
                 if(server ==  null || server.isEmpty())  server = "localhost";
                 if(port ==  null || port.isEmpty())              port = "8080";
                 if(webapp ==  null || webapp.isEmpty())  webapp = webAppName;
                 if(interval ==  null || interval.isEmpty() || getIntervalInt() <= 0) interval = "30";
        }

         public  void run() {
                 try{
                         //  check mandatory params
                         if(server.isEmpty() || webapp.isEmpty() || params ==  null || params.isEmpty()){
                                logger.warn("<index update process> Insuficient info provided for data import");
                                logger.info("<index update process> Reloading global dataimport.properties");
                                reloadParams();

                         //  single-core
                        } else  if(singleCore){
                                prepUrlSendHttpPost();

                         //  multi-core
                        } else  if(syncCores.length == 0 || (syncCores.length == 1 && syncCores[0].isEmpty())){
                                logger.warn("<index update process> No cores scheduled for data import");
                                logger.info("<index update process> Reloading global dataimport.properties");
                                reloadParams();

                        } else{
                                 for(String core : syncCores){
                                        prepUrlSendHttpPost(core);
                                }
                        }
                } catch(Exception e){
                        logger.error("Failed to prepare for sendHttpPost", e);
                        reloadParams();
                }
        }


         private  void prepUrlSendHttpPost(){
                String coreUrl = "http://" + server + ":" + port + "/" + webapp + params;
                sendHttpPost(coreUrl,  null);
        }

         private  void prepUrlSendHttpPost(String coreName){
                String coreUrl = "http://" + server + ":" + port + "/" + webapp + "/" + coreName + params;
                sendHttpPost(coreUrl, coreName);
        }


         private  void sendHttpPost(String completeUrl, String coreName){
                DateFormat df =  new SimpleDateFormat("dd.MM.yyyy HH:mm:ss SSS");
                Date startTime =  new Date();

                 //  prepare the core var
                String core = coreName ==  null ? "" : "[" + coreName + "] ";

                logger.info(core + "<index update process> Process started at .............. " + df.format(startTime));

                 try{

                    URL url =  new URL(completeUrl);
                    HttpURLConnection conn = (HttpURLConnection)url.openConnection();

                    conn.setRequestMethod("POST");
                    conn.setRequestProperty("type", "submit");
                    conn.setDoOutput( true);

                         //  Send HTTP POST
                    conn.connect();

                    logger.info(core + "<index update process> Request method\t\t\t" + conn.getRequestMethod());
                    logger.info(core + "<index update process> Succesfully connected to server\t" + server);
                    logger.info(core + "<index update process> Using port\t\t\t" + port);
                    logger.info(core + "<index update process> Application name\t\t\t" + webapp);
                    logger.info(core + "<index update process> URL params\t\t\t" + params);
                    logger.info(core + "<index update process> Full URL\t\t\t\t" + conn.getURL());
                    logger.info(core + "<index update process> Response message\t\t\t" + conn.getResponseMessage());
                    logger.info(core + "<index update process> Response code\t\t\t" + conn.getResponseCode());

                     // listen for change in properties file if an error occurs
                     if(conn.getResponseCode() != 200){
                        reloadParams();
                    }

                    conn.disconnect();
                    logger.info(core + "<index update process> Disconnected from server\t\t" + server);
                    Date endTime =  new Date();
                    logger.info(core + "<index update process> Process ended at ................ " + df.format(endTime));
                } catch(MalformedURLException mue){
                        logger.error("Failed to assemble URL for HTTP POST", mue);
                } catch(IOException ioe){
                        logger.error("Failed to connect to the specified URL while trying to send HTTP POST", ioe);
                } catch(Exception e){
                        logger.error("Failed to send HTTP POST", e);
                }
        }

         public  int getIntervalInt() {
                 try{
                         return Integer.parseInt(interval);
                } catch(NumberFormatException e){
                        logger.warn("Unable to convert 'interval' to number. Using default value (30) instead", e);
                         return 30;  // return default in case of error
                }
        }

(3) 属性文件类SolrDataImportProperties

package org.apache.solr.handler.dataimport.scheduler;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Properties;

import org.apache.solr.core.SolrResourceLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public  class SolrDataImportProperties {
         private Properties properties;

         public  static  final String SYNC_ENABLED         = "syncEnabled";
         public  static  final String SYNC_CORES           = "syncCores";
         public  static  final String SERVER               = "server";
         public  static  final String PORT                 = "port";
         public  static  final String WEBAPP               = "webapp";
         public  static  final String PARAMS               = "params";
         public  static  final String INTERVAL             = "interval";

         private  static  final Logger logger = LoggerFactory.getLogger(SolrDataImportProperties. class);

         public SolrDataImportProperties(){
//               loadProperties(true);
        }

         public  void loadProperties( boolean force){
                 try{
                        SolrResourceLoader loader =  new SolrResourceLoader( null);
                        logger.info("Instance dir = " + loader.getInstanceDir());

                        String configDir = loader.getConfigDir();
                        configDir = SolrResourceLoader.normalizeDir(configDir);
                         if(force || properties ==  null){
                                properties =  new Properties();

                                String dataImportPropertiesPath = configDir + "\\dataimport.properties";

                                FileInputStream fis =  new FileInputStream(dataImportPropertiesPath);
                                properties.load(fis);
                        }
                } catch(FileNotFoundException fnfe){
                        logger.error("Error locating DataImportScheduler dataimport.properties file", fnfe);
                } catch(IOException ioe){
                        logger.error("Error reading DataImportScheduler dataimport.properties file", ioe);
                } catch(Exception e){
                        logger.error("Error loading DataImportScheduler properties", e);
                }
        }

         public String getProperty(String key){
                 return properties.getProperty(key);
        }

3 在solr.home的文件夹下建立conf 文件夹

 属性文件dataimport.properties放在该文件夹下

我的属性文件内容配置如下

#################################################
#                                               #
#       dataimport scheduler properties         #
#                                               #
#################################################

#  to sync or not to sync
#  1 - active ;  anything else - inactive
syncEnabled=1

#  which cores to schedule
#  in a multi-core environment you can decide which cores you want syncronized
#  leave empty or comment it out if using single-core deployment
syncCores=core0

#  solr server name or IP address
#   [ defaults to localhost if empty ]
server=localhost

#  solr server port
#   [ defaults to 80 if empty ]
port=8080

#  application name/context
#   [ defaults to current ServletContextListener's context (app) name ]
webapp=solr

#  URL params  [ mandatory ]
#  remainder of URL
params=/dataimport?command=delta-import&clean=false&commit=true

#  params=/dataimport?command=delta-import&clean=false&commit=true

#  schedule interval
#  number of minutes between two runs
#   [ defaults to 30 if empty ]

interval=10 

你可能感兴趣的:(Solr)